relaton-itu 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/Gemfile.lock +7 -8
- data/appveyor.yml +1 -0
- data/lib/relaton_itu/hit_collection.rb +4 -2
- data/lib/relaton_itu/itu_bibliography.rb +3 -2
- data/lib/relaton_itu/scrapper.rb +41 -27
- data/lib/relaton_itu/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0e53b2a711cf2600a246b1ef35dff524624ae266
|
|
4
|
+
data.tar.gz: 5b0234760082b97040111b7fe18814977e28fd6b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a3dbf304c78838b895c6da7e869b70bbef5b60f00f12498b0a5101761d052828ebf8e45215e33d728d071cfbf9b536857826270ea09e71ca4e6bef3c9070cd9b
|
|
7
|
+
data.tar.gz: fde81735c358c19beb765da48726306c7ff020f9f02d66bf1204b9d687b73bf6ff707d6c70727738afa56200312623e36fab33e21310eec49b3a280dcd6c4411
|
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
relaton-itu (0.2.
|
|
4
|
+
relaton-itu (0.2.3)
|
|
5
5
|
relaton-iso-bib (~> 0.2.0)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
@@ -24,9 +24,9 @@ GEM
|
|
|
24
24
|
isoics (0.1.7)
|
|
25
25
|
json (2.2.0)
|
|
26
26
|
method_source (0.9.2)
|
|
27
|
-
mini_portile2 (2.
|
|
28
|
-
nokogiri (1.
|
|
29
|
-
mini_portile2 (~> 2.
|
|
27
|
+
mini_portile2 (2.4.0)
|
|
28
|
+
nokogiri (1.10.3)
|
|
29
|
+
mini_portile2 (~> 2.4.0)
|
|
30
30
|
pry (0.12.2)
|
|
31
31
|
coderay (~> 1.1.0)
|
|
32
32
|
method_source (~> 0.9.0)
|
|
@@ -35,12 +35,11 @@ GEM
|
|
|
35
35
|
pry (~> 0.10)
|
|
36
36
|
public_suffix (3.1.1)
|
|
37
37
|
rake (10.5.0)
|
|
38
|
-
relaton-bib (0.2.
|
|
38
|
+
relaton-bib (0.2.5)
|
|
39
39
|
addressable
|
|
40
|
-
nokogiri (~> 1.
|
|
41
|
-
relaton-iso-bib (0.2.
|
|
40
|
+
nokogiri (~> 1.10)
|
|
41
|
+
relaton-iso-bib (0.2.4)
|
|
42
42
|
isoics (~> 0.1.6)
|
|
43
|
-
nokogiri (~> 1.8.4)
|
|
44
43
|
relaton-bib (~> 0.2.0)
|
|
45
44
|
ruby_deep_clone (~> 0.8.0)
|
|
46
45
|
rspec (3.8.0)
|
data/appveyor.yml
CHANGED
|
@@ -28,6 +28,7 @@ module RelatonItu
|
|
|
28
28
|
# from = Date.strptime year, "%Y"
|
|
29
29
|
# to = from.next_year.prev_day
|
|
30
30
|
# end
|
|
31
|
+
group = %r{(OB|Operational Bulletin) No} =~ text ? "Publications" : "Recommendations"
|
|
31
32
|
url = "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
|
|
32
33
|
params = {
|
|
33
34
|
"Input" => ref_nbr,
|
|
@@ -36,7 +37,7 @@ module RelatonItu
|
|
|
36
37
|
"SortBy" => "RELEVANCE",
|
|
37
38
|
"ExactPhrase" => false,
|
|
38
39
|
"CollectionName" => "General",
|
|
39
|
-
"CollectionGroup" =>
|
|
40
|
+
"CollectionGroup" => group,
|
|
40
41
|
"Sector" => "t",
|
|
41
42
|
"Criterias" => [{
|
|
42
43
|
"Name" => "Search in",
|
|
@@ -82,7 +83,8 @@ module RelatonItu
|
|
|
82
83
|
code = h["Media"]["Name"]
|
|
83
84
|
title = h["Title"]
|
|
84
85
|
url = h["Redirection"]
|
|
85
|
-
|
|
86
|
+
type = group.downcase[0...-1]
|
|
87
|
+
Hit.new({ code: code, title: title, url: url, type: type }, self)
|
|
86
88
|
end
|
|
87
89
|
concat hits
|
|
88
90
|
@fetched = false
|
|
@@ -73,12 +73,13 @@ module RelatonItu
|
|
|
73
73
|
end
|
|
74
74
|
|
|
75
75
|
def search_filter(code)
|
|
76
|
-
docidrx = %r{^ITU-T\s[^\s]+}
|
|
76
|
+
docidrx = %r{\w+.\d+} # %r{^ITU-T\s[^\s]+}
|
|
77
|
+
c = code.match(docidrx).to_s
|
|
77
78
|
warn "fetching #{code}..."
|
|
78
79
|
result = search(code)
|
|
79
80
|
result.select do |i|
|
|
80
81
|
i.hit[:code] &&
|
|
81
|
-
i.hit[:code].match(docidrx).to_s ==
|
|
82
|
+
i.hit[:code].match(docidrx).to_s == c
|
|
82
83
|
end
|
|
83
84
|
end
|
|
84
85
|
|
data/lib/relaton_itu/scrapper.rb
CHANGED
|
@@ -13,6 +13,7 @@ module RelatonItu
|
|
|
13
13
|
# rubocop:disable Metrics/ModuleLength
|
|
14
14
|
module Scrapper
|
|
15
15
|
DOMAIN = "https://www.itu.int"
|
|
16
|
+
ROMAN_MONTHS = %w[I II III IV V VI VII VIII IX X XI XII].freeze
|
|
16
17
|
|
|
17
18
|
TYPES = {
|
|
18
19
|
"ISO" => "international-standard",
|
|
@@ -50,7 +51,7 @@ module RelatonItu
|
|
|
50
51
|
doc = get_page hit_data[:url]
|
|
51
52
|
|
|
52
53
|
# Fetch edition.
|
|
53
|
-
edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")
|
|
54
|
+
edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")&.text
|
|
54
55
|
|
|
55
56
|
ItuBibliographicItem.new(
|
|
56
57
|
fetched: Date.today.to_s,
|
|
@@ -59,7 +60,7 @@ module RelatonItu
|
|
|
59
60
|
language: ["en"],
|
|
60
61
|
script: ["Latn"],
|
|
61
62
|
titles: fetch_titles(hit_data),
|
|
62
|
-
type:
|
|
63
|
+
type: hit_data[:type],
|
|
63
64
|
docstatus: fetch_status(doc),
|
|
64
65
|
ics: [], # fetch_ics(doc),
|
|
65
66
|
dates: fetch_dates(doc),
|
|
@@ -138,34 +139,26 @@ module RelatonItu
|
|
|
138
139
|
|
|
139
140
|
# Fetch status.
|
|
140
141
|
# @param doc [Nokogiri::HTML::Document]
|
|
141
|
-
# @
|
|
142
|
-
# @return [Hash]
|
|
142
|
+
# @return [RelatonBib::DocumentStatus, NilClass]
|
|
143
143
|
def fetch_status(doc)
|
|
144
|
-
s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]")
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
# substage = "60"
|
|
149
|
-
else
|
|
150
|
-
status = "Withdrawal"
|
|
151
|
-
# stage = "95"
|
|
152
|
-
# substage = "99"
|
|
153
|
-
end
|
|
144
|
+
s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]")
|
|
145
|
+
return unless s
|
|
146
|
+
|
|
147
|
+
status = s.text == "In force" ? "Published" : "Withdrawal"
|
|
154
148
|
RelatonBib::DocumentStatus.new(stage: status)
|
|
155
149
|
end
|
|
156
150
|
|
|
157
151
|
# Fetch workgroup.
|
|
158
152
|
# @param doc [Nokogiri::HTML::Document]
|
|
159
|
-
# @return [RelatonItu::EditorialGroup]
|
|
153
|
+
# @return [RelatonItu::EditorialGroup, NilClass]
|
|
160
154
|
def fetch_workgroup(doc)
|
|
161
|
-
wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a')
|
|
155
|
+
wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a')
|
|
156
|
+
return unless wg
|
|
157
|
+
|
|
158
|
+
workgroup = wg.text
|
|
162
159
|
EditorialGroup.new(
|
|
163
|
-
bureau:
|
|
164
|
-
group: itugroup(
|
|
165
|
-
# name: "International Telecommunication Union",
|
|
166
|
-
# abbreviation: "ITU",
|
|
167
|
-
# url: "www.itu.int",
|
|
168
|
-
# technical_committee: tc,
|
|
160
|
+
bureau: workgroup.match(/(?<=-)./).to_s,
|
|
161
|
+
group: itugroup(workgroup),
|
|
169
162
|
)
|
|
170
163
|
end
|
|
171
164
|
|
|
@@ -209,9 +202,9 @@ module RelatonItu
|
|
|
209
202
|
# Fetch type.
|
|
210
203
|
# @param doc [Nokogiri::HTML::Document]
|
|
211
204
|
# @return [String]
|
|
212
|
-
def fetch_type(_doc)
|
|
213
|
-
|
|
214
|
-
end
|
|
205
|
+
# def fetch_type(_doc)
|
|
206
|
+
# "recommendation"
|
|
207
|
+
# end
|
|
215
208
|
|
|
216
209
|
# Fetch titles.
|
|
217
210
|
# @param hit_data [Hash]
|
|
@@ -248,13 +241,33 @@ module RelatonItu
|
|
|
248
241
|
# @return [Array<Hash>]
|
|
249
242
|
def fetch_dates(doc)
|
|
250
243
|
dates = []
|
|
251
|
-
|
|
244
|
+
pdate = doc.at("//table/tr/td/span[contains(@id, 'Label5')]")
|
|
245
|
+
publish_date = pdate&.text || ob_date(doc)
|
|
252
246
|
unless publish_date.empty?
|
|
253
247
|
dates << { type: "published", on: publish_date }
|
|
254
248
|
end
|
|
255
249
|
dates
|
|
256
250
|
end
|
|
257
251
|
|
|
252
|
+
# Scrape Operational Bulletin date.
|
|
253
|
+
# @param doc [Nokogiri::HTML::Document]
|
|
254
|
+
# @return [String]
|
|
255
|
+
def ob_date(doc)
|
|
256
|
+
pdate = doc.at('//table/tbody/tr/td[contains(text(), "Year:")]')
|
|
257
|
+
return unless pdate
|
|
258
|
+
|
|
259
|
+
roman_to_arabic pdate.text.match(%r{(?<=Year: )\d{2}.\w+.\d{4}}).to_s
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Convert roman month number in string date to arabic number
|
|
263
|
+
# @param date [String]
|
|
264
|
+
# @return [String]
|
|
265
|
+
def roman_to_arabic(date)
|
|
266
|
+
%r{(?<rmonth>[IVX]+)} =~ date
|
|
267
|
+
month = ROMAN_MONTHS.index(rmonth) + 1
|
|
268
|
+
Date.parse(date.sub(%r{[IVX]+}, month.to_s)).to_s
|
|
269
|
+
end
|
|
270
|
+
|
|
258
271
|
# Fetch contributors
|
|
259
272
|
# @param doc [Nokogiri::HTML::Document]
|
|
260
273
|
# @return [Array<Hash>]
|
|
@@ -300,7 +313,8 @@ module RelatonItu
|
|
|
300
313
|
name = "International Telecommunication Union"
|
|
301
314
|
url = "www.itu.int"
|
|
302
315
|
end
|
|
303
|
-
|
|
316
|
+
fdate = doc.at("//table/tr/td/span[contains(@id, 'Label5')]")
|
|
317
|
+
from = fdate&.text || ob_date(doc)
|
|
304
318
|
{ owner: { name: name, abbreviation: abbreviation, url: url }, from: from }
|
|
305
319
|
end
|
|
306
320
|
end
|
data/lib/relaton_itu/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-itu
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2019-07-
|
|
11
|
+
date: 2019-07-22 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|