relaton-itu 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/Gemfile.lock +7 -8
- data/appveyor.yml +1 -0
- data/lib/relaton_itu/hit_collection.rb +4 -2
- data/lib/relaton_itu/itu_bibliography.rb +3 -2
- data/lib/relaton_itu/scrapper.rb +41 -27
- data/lib/relaton_itu/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e53b2a711cf2600a246b1ef35dff524624ae266
|
4
|
+
data.tar.gz: 5b0234760082b97040111b7fe18814977e28fd6b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3dbf304c78838b895c6da7e869b70bbef5b60f00f12498b0a5101761d052828ebf8e45215e33d728d071cfbf9b536857826270ea09e71ca4e6bef3c9070cd9b
|
7
|
+
data.tar.gz: fde81735c358c19beb765da48726306c7ff020f9f02d66bf1204b9d687b73bf6ff707d6c70727738afa56200312623e36fab33e21310eec49b3a280dcd6c4411
|
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
relaton-itu (0.2.
|
4
|
+
relaton-itu (0.2.3)
|
5
5
|
relaton-iso-bib (~> 0.2.0)
|
6
6
|
|
7
7
|
GEM
|
@@ -24,9 +24,9 @@ GEM
|
|
24
24
|
isoics (0.1.7)
|
25
25
|
json (2.2.0)
|
26
26
|
method_source (0.9.2)
|
27
|
-
mini_portile2 (2.
|
28
|
-
nokogiri (1.
|
29
|
-
mini_portile2 (~> 2.
|
27
|
+
mini_portile2 (2.4.0)
|
28
|
+
nokogiri (1.10.3)
|
29
|
+
mini_portile2 (~> 2.4.0)
|
30
30
|
pry (0.12.2)
|
31
31
|
coderay (~> 1.1.0)
|
32
32
|
method_source (~> 0.9.0)
|
@@ -35,12 +35,11 @@ GEM
|
|
35
35
|
pry (~> 0.10)
|
36
36
|
public_suffix (3.1.1)
|
37
37
|
rake (10.5.0)
|
38
|
-
relaton-bib (0.2.
|
38
|
+
relaton-bib (0.2.5)
|
39
39
|
addressable
|
40
|
-
nokogiri (~> 1.
|
41
|
-
relaton-iso-bib (0.2.
|
40
|
+
nokogiri (~> 1.10)
|
41
|
+
relaton-iso-bib (0.2.4)
|
42
42
|
isoics (~> 0.1.6)
|
43
|
-
nokogiri (~> 1.8.4)
|
44
43
|
relaton-bib (~> 0.2.0)
|
45
44
|
ruby_deep_clone (~> 0.8.0)
|
46
45
|
rspec (3.8.0)
|
data/appveyor.yml
CHANGED
@@ -28,6 +28,7 @@ module RelatonItu
|
|
28
28
|
# from = Date.strptime year, "%Y"
|
29
29
|
# to = from.next_year.prev_day
|
30
30
|
# end
|
31
|
+
group = %r{(OB|Operational Bulletin) No} =~ text ? "Publications" : "Recommendations"
|
31
32
|
url = "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
|
32
33
|
params = {
|
33
34
|
"Input" => ref_nbr,
|
@@ -36,7 +37,7 @@ module RelatonItu
|
|
36
37
|
"SortBy" => "RELEVANCE",
|
37
38
|
"ExactPhrase" => false,
|
38
39
|
"CollectionName" => "General",
|
39
|
-
"CollectionGroup" =>
|
40
|
+
"CollectionGroup" => group,
|
40
41
|
"Sector" => "t",
|
41
42
|
"Criterias" => [{
|
42
43
|
"Name" => "Search in",
|
@@ -82,7 +83,8 @@ module RelatonItu
|
|
82
83
|
code = h["Media"]["Name"]
|
83
84
|
title = h["Title"]
|
84
85
|
url = h["Redirection"]
|
85
|
-
|
86
|
+
type = group.downcase[0...-1]
|
87
|
+
Hit.new({ code: code, title: title, url: url, type: type }, self)
|
86
88
|
end
|
87
89
|
concat hits
|
88
90
|
@fetched = false
|
@@ -73,12 +73,13 @@ module RelatonItu
|
|
73
73
|
end
|
74
74
|
|
75
75
|
def search_filter(code)
|
76
|
-
docidrx = %r{^ITU-T\s[^\s]+}
|
76
|
+
docidrx = %r{\w+.\d+} # %r{^ITU-T\s[^\s]+}
|
77
|
+
c = code.match(docidrx).to_s
|
77
78
|
warn "fetching #{code}..."
|
78
79
|
result = search(code)
|
79
80
|
result.select do |i|
|
80
81
|
i.hit[:code] &&
|
81
|
-
i.hit[:code].match(docidrx).to_s ==
|
82
|
+
i.hit[:code].match(docidrx).to_s == c
|
82
83
|
end
|
83
84
|
end
|
84
85
|
|
data/lib/relaton_itu/scrapper.rb
CHANGED
@@ -13,6 +13,7 @@ module RelatonItu
|
|
13
13
|
# rubocop:disable Metrics/ModuleLength
|
14
14
|
module Scrapper
|
15
15
|
DOMAIN = "https://www.itu.int"
|
16
|
+
ROMAN_MONTHS = %w[I II III IV V VI VII VIII IX X XI XII].freeze
|
16
17
|
|
17
18
|
TYPES = {
|
18
19
|
"ISO" => "international-standard",
|
@@ -50,7 +51,7 @@ module RelatonItu
|
|
50
51
|
doc = get_page hit_data[:url]
|
51
52
|
|
52
53
|
# Fetch edition.
|
53
|
-
edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")
|
54
|
+
edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")&.text
|
54
55
|
|
55
56
|
ItuBibliographicItem.new(
|
56
57
|
fetched: Date.today.to_s,
|
@@ -59,7 +60,7 @@ module RelatonItu
|
|
59
60
|
language: ["en"],
|
60
61
|
script: ["Latn"],
|
61
62
|
titles: fetch_titles(hit_data),
|
62
|
-
type:
|
63
|
+
type: hit_data[:type],
|
63
64
|
docstatus: fetch_status(doc),
|
64
65
|
ics: [], # fetch_ics(doc),
|
65
66
|
dates: fetch_dates(doc),
|
@@ -138,34 +139,26 @@ module RelatonItu
|
|
138
139
|
|
139
140
|
# Fetch status.
|
140
141
|
# @param doc [Nokogiri::HTML::Document]
|
141
|
-
# @
|
142
|
-
# @return [Hash]
|
142
|
+
# @return [RelatonBib::DocumentStatus, NilClass]
|
143
143
|
def fetch_status(doc)
|
144
|
-
s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]")
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
# substage = "60"
|
149
|
-
else
|
150
|
-
status = "Withdrawal"
|
151
|
-
# stage = "95"
|
152
|
-
# substage = "99"
|
153
|
-
end
|
144
|
+
s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]")
|
145
|
+
return unless s
|
146
|
+
|
147
|
+
status = s.text == "In force" ? "Published" : "Withdrawal"
|
154
148
|
RelatonBib::DocumentStatus.new(stage: status)
|
155
149
|
end
|
156
150
|
|
157
151
|
# Fetch workgroup.
|
158
152
|
# @param doc [Nokogiri::HTML::Document]
|
159
|
-
# @return [RelatonItu::EditorialGroup]
|
153
|
+
# @return [RelatonItu::EditorialGroup, NilClass]
|
160
154
|
def fetch_workgroup(doc)
|
161
|
-
wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a')
|
155
|
+
wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a')
|
156
|
+
return unless wg
|
157
|
+
|
158
|
+
workgroup = wg.text
|
162
159
|
EditorialGroup.new(
|
163
|
-
bureau:
|
164
|
-
group: itugroup(
|
165
|
-
# name: "International Telecommunication Union",
|
166
|
-
# abbreviation: "ITU",
|
167
|
-
# url: "www.itu.int",
|
168
|
-
# technical_committee: tc,
|
160
|
+
bureau: workgroup.match(/(?<=-)./).to_s,
|
161
|
+
group: itugroup(workgroup),
|
169
162
|
)
|
170
163
|
end
|
171
164
|
|
@@ -209,9 +202,9 @@ module RelatonItu
|
|
209
202
|
# Fetch type.
|
210
203
|
# @param doc [Nokogiri::HTML::Document]
|
211
204
|
# @return [String]
|
212
|
-
def fetch_type(_doc)
|
213
|
-
|
214
|
-
end
|
205
|
+
# def fetch_type(_doc)
|
206
|
+
# "recommendation"
|
207
|
+
# end
|
215
208
|
|
216
209
|
# Fetch titles.
|
217
210
|
# @param hit_data [Hash]
|
@@ -248,13 +241,33 @@ module RelatonItu
|
|
248
241
|
# @return [Array<Hash>]
|
249
242
|
def fetch_dates(doc)
|
250
243
|
dates = []
|
251
|
-
|
244
|
+
pdate = doc.at("//table/tr/td/span[contains(@id, 'Label5')]")
|
245
|
+
publish_date = pdate&.text || ob_date(doc)
|
252
246
|
unless publish_date.empty?
|
253
247
|
dates << { type: "published", on: publish_date }
|
254
248
|
end
|
255
249
|
dates
|
256
250
|
end
|
257
251
|
|
252
|
+
# Scrape Operational Bulletin date.
|
253
|
+
# @param doc [Nokogiri::HTML::Document]
|
254
|
+
# @return [String]
|
255
|
+
def ob_date(doc)
|
256
|
+
pdate = doc.at('//table/tbody/tr/td[contains(text(), "Year:")]')
|
257
|
+
return unless pdate
|
258
|
+
|
259
|
+
roman_to_arabic pdate.text.match(%r{(?<=Year: )\d{2}.\w+.\d{4}}).to_s
|
260
|
+
end
|
261
|
+
|
262
|
+
# Convert roman month number in string date to arabic number
|
263
|
+
# @param date [String]
|
264
|
+
# @return [String]
|
265
|
+
def roman_to_arabic(date)
|
266
|
+
%r{(?<rmonth>[IVX]+)} =~ date
|
267
|
+
month = ROMAN_MONTHS.index(rmonth) + 1
|
268
|
+
Date.parse(date.sub(%r{[IVX]+}, month.to_s)).to_s
|
269
|
+
end
|
270
|
+
|
258
271
|
# Fetch contributors
|
259
272
|
# @param doc [Nokogiri::HTML::Document]
|
260
273
|
# @return [Array<Hash>]
|
@@ -300,7 +313,8 @@ module RelatonItu
|
|
300
313
|
name = "International Telecommunication Union"
|
301
314
|
url = "www.itu.int"
|
302
315
|
end
|
303
|
-
|
316
|
+
fdate = doc.at("//table/tr/td/span[contains(@id, 'Label5')]")
|
317
|
+
from = fdate&.text || ob_date(doc)
|
304
318
|
{ owner: { name: name, abbreviation: abbreviation, url: url }, from: from }
|
305
319
|
end
|
306
320
|
end
|
data/lib/relaton_itu/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-itu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-07-
|
11
|
+
date: 2019-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|