relaton-itu 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 06af239fca985e01b1bf84c15ef3353adba66888
4
- data.tar.gz: 07f56fe72469d205e2da95cf4e6c17c5d83742ad
3
+ metadata.gz: 0e53b2a711cf2600a246b1ef35dff524624ae266
4
+ data.tar.gz: 5b0234760082b97040111b7fe18814977e28fd6b
5
5
  SHA512:
6
- metadata.gz: a1b4e86236bc6ad52ab08366e03ac045f846ba56bd5b962852b95b9152bfcd21e1b23065388abb5635278ff37d1fc326d3cfe4c2037657a8af0d9e342ada6d01
7
- data.tar.gz: 907809fe72f0293cfa0f0f9194bee933b61f1ced2c75010f6acaa92d29b5f3b3851b5a16d955d6e1b5caf3b5478e7e736b4d18897356b082089b83d48481e896
6
+ metadata.gz: a3dbf304c78838b895c6da7e869b70bbef5b60f00f12498b0a5101761d052828ebf8e45215e33d728d071cfbf9b536857826270ea09e71ca4e6bef3c9070cd9b
7
+ data.tar.gz: fde81735c358c19beb765da48726306c7ff020f9f02d66bf1204b9d687b73bf6ff707d6c70727738afa56200312623e36fab33e21310eec49b3a280dcd6c4411
data/.travis.yml CHANGED
@@ -6,6 +6,7 @@ os:
6
6
  - linux
7
7
  - osx
8
8
  rvm:
9
+ - 2.6
9
10
  - 2.5
10
11
  - 2.4
11
12
  - ruby-head
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- relaton-itu (0.2.2)
4
+ relaton-itu (0.2.3)
5
5
  relaton-iso-bib (~> 0.2.0)
6
6
 
7
7
  GEM
@@ -24,9 +24,9 @@ GEM
24
24
  isoics (0.1.7)
25
25
  json (2.2.0)
26
26
  method_source (0.9.2)
27
- mini_portile2 (2.3.0)
28
- nokogiri (1.8.5)
29
- mini_portile2 (~> 2.3.0)
27
+ mini_portile2 (2.4.0)
28
+ nokogiri (1.10.3)
29
+ mini_portile2 (~> 2.4.0)
30
30
  pry (0.12.2)
31
31
  coderay (~> 1.1.0)
32
32
  method_source (~> 0.9.0)
@@ -35,12 +35,11 @@ GEM
35
35
  pry (~> 0.10)
36
36
  public_suffix (3.1.1)
37
37
  rake (10.5.0)
38
- relaton-bib (0.2.3)
38
+ relaton-bib (0.2.5)
39
39
  addressable
40
- nokogiri (~> 1.8.4)
41
- relaton-iso-bib (0.2.3)
40
+ nokogiri (~> 1.10)
41
+ relaton-iso-bib (0.2.4)
42
42
  isoics (~> 0.1.6)
43
- nokogiri (~> 1.8.4)
44
43
  relaton-bib (~> 0.2.0)
45
44
  ruby_deep_clone (~> 0.8.0)
46
45
  rspec (3.8.0)
data/appveyor.yml CHANGED
@@ -7,6 +7,7 @@ cache:
7
7
 
8
8
  environment:
9
9
  matrix:
10
+ - RUBY_VERSION: 26
10
11
  - RUBY_VERSION: 25
11
12
  - RUBY_VERSION: 24
12
13
  - RUBY_VERSION: _trunk
@@ -28,6 +28,7 @@ module RelatonItu
28
28
  # from = Date.strptime year, "%Y"
29
29
  # to = from.next_year.prev_day
30
30
  # end
31
+ group = %r{(OB|Operational Bulletin) No} =~ text ? "Publications" : "Recommendations"
31
32
  url = "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
32
33
  params = {
33
34
  "Input" => ref_nbr,
@@ -36,7 +37,7 @@ module RelatonItu
36
37
  "SortBy" => "RELEVANCE",
37
38
  "ExactPhrase" => false,
38
39
  "CollectionName" => "General",
39
- "CollectionGroup" => "Recommendations",
40
+ "CollectionGroup" => group,
40
41
  "Sector" => "t",
41
42
  "Criterias" => [{
42
43
  "Name" => "Search in",
@@ -82,7 +83,8 @@ module RelatonItu
82
83
  code = h["Media"]["Name"]
83
84
  title = h["Title"]
84
85
  url = h["Redirection"]
85
- Hit.new({ code: code, title: title, url: url }, self)
86
+ type = group.downcase[0...-1]
87
+ Hit.new({ code: code, title: title, url: url, type: type }, self)
86
88
  end
87
89
  concat hits
88
90
  @fetched = false
@@ -73,12 +73,13 @@ module RelatonItu
73
73
  end
74
74
 
75
75
  def search_filter(code)
76
- docidrx = %r{^ITU-T\s[^\s]+}
76
+ docidrx = %r{\w+.\d+} # %r{^ITU-T\s[^\s]+}
77
+ c = code.match(docidrx).to_s
77
78
  warn "fetching #{code}..."
78
79
  result = search(code)
79
80
  result.select do |i|
80
81
  i.hit[:code] &&
81
- i.hit[:code].match(docidrx).to_s == code
82
+ i.hit[:code].match(docidrx).to_s == c
82
83
  end
83
84
  end
84
85
 
@@ -13,6 +13,7 @@ module RelatonItu
13
13
  # rubocop:disable Metrics/ModuleLength
14
14
  module Scrapper
15
15
  DOMAIN = "https://www.itu.int"
16
+ ROMAN_MONTHS = %w[I II III IV V VI VII VIII IX X XI XII].freeze
16
17
 
17
18
  TYPES = {
18
19
  "ISO" => "international-standard",
@@ -50,7 +51,7 @@ module RelatonItu
50
51
  doc = get_page hit_data[:url]
51
52
 
52
53
  # Fetch edition.
53
- edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b").text
54
+ edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")&.text
54
55
 
55
56
  ItuBibliographicItem.new(
56
57
  fetched: Date.today.to_s,
@@ -59,7 +60,7 @@ module RelatonItu
59
60
  language: ["en"],
60
61
  script: ["Latn"],
61
62
  titles: fetch_titles(hit_data),
62
- type: fetch_type(doc),
63
+ type: hit_data[:type],
63
64
  docstatus: fetch_status(doc),
64
65
  ics: [], # fetch_ics(doc),
65
66
  dates: fetch_dates(doc),
@@ -138,34 +139,26 @@ module RelatonItu
138
139
 
139
140
  # Fetch status.
140
141
  # @param doc [Nokogiri::HTML::Document]
141
- # @param status [String]
142
- # @return [Hash]
142
+ # @return [RelatonBib::DocumentStatus, NilClass]
143
143
  def fetch_status(doc)
144
- s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]").text
145
- if s == "In force"
146
- status = "Published"
147
- # stage = "60"
148
- # substage = "60"
149
- else
150
- status = "Withdrawal"
151
- # stage = "95"
152
- # substage = "99"
153
- end
144
+ s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]")
145
+ return unless s
146
+
147
+ status = s.text == "In force" ? "Published" : "Withdrawal"
154
148
  RelatonBib::DocumentStatus.new(stage: status)
155
149
  end
156
150
 
157
151
  # Fetch workgroup.
158
152
  # @param doc [Nokogiri::HTML::Document]
159
- # @return [RelatonItu::EditorialGroup]
153
+ # @return [RelatonItu::EditorialGroup, NilClass]
160
154
  def fetch_workgroup(doc)
161
- wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a').text
155
+ wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a')
156
+ return unless wg
157
+
158
+ workgroup = wg.text
162
159
  EditorialGroup.new(
163
- bureau: wg.match(/(?<=-)./).to_s,
164
- group: itugroup(wg),
165
- # name: "International Telecommunication Union",
166
- # abbreviation: "ITU",
167
- # url: "www.itu.int",
168
- # technical_committee: tc,
160
+ bureau: workgroup.match(/(?<=-)./).to_s,
161
+ group: itugroup(workgroup),
169
162
  )
170
163
  end
171
164
 
@@ -209,9 +202,9 @@ module RelatonItu
209
202
  # Fetch type.
210
203
  # @param doc [Nokogiri::HTML::Document]
211
204
  # @return [String]
212
- def fetch_type(_doc)
213
- "recommendation"
214
- end
205
+ # def fetch_type(_doc)
206
+ # "recommendation"
207
+ # end
215
208
 
216
209
  # Fetch titles.
217
210
  # @param hit_data [Hash]
@@ -248,13 +241,33 @@ module RelatonItu
248
241
  # @return [Array<Hash>]
249
242
  def fetch_dates(doc)
250
243
  dates = []
251
- publish_date = doc.at("//table/tr/td/span[contains(@id, 'Label5')]").text
244
+ pdate = doc.at("//table/tr/td/span[contains(@id, 'Label5')]")
245
+ publish_date = pdate&.text || ob_date(doc)
252
246
  unless publish_date.empty?
253
247
  dates << { type: "published", on: publish_date }
254
248
  end
255
249
  dates
256
250
  end
257
251
 
252
+ # Scrape Operational Bulletin date.
253
+ # @param doc [Nokogiri::HTML::Document]
254
+ # @return [String]
255
+ def ob_date(doc)
256
+ pdate = doc.at('//table/tbody/tr/td[contains(text(), "Year:")]')
257
+ return unless pdate
258
+
259
+ roman_to_arabic pdate.text.match(%r{(?<=Year: )\d{2}.\w+.\d{4}}).to_s
260
+ end
261
+
262
+ # Convert roman month number in string date to arabic number
263
+ # @param date [String]
264
+ # @return [String]
265
+ def roman_to_arabic(date)
266
+ %r{(?<rmonth>[IVX]+)} =~ date
267
+ month = ROMAN_MONTHS.index(rmonth) + 1
268
+ Date.parse(date.sub(%r{[IVX]+}, month.to_s)).to_s
269
+ end
270
+
258
271
  # Fetch contributors
259
272
  # @param doc [Nokogiri::HTML::Document]
260
273
  # @return [Array<Hash>]
@@ -300,7 +313,8 @@ module RelatonItu
300
313
  name = "International Telecommunication Union"
301
314
  url = "www.itu.int"
302
315
  end
303
- from = doc.at("//table/tr/td/span[contains(@id, 'Label5')]").text
316
+ fdate = doc.at("//table/tr/td/span[contains(@id, 'Label5')]")
317
+ from = fdate&.text || ob_date(doc)
304
318
  { owner: { name: name, abbreviation: abbreviation, url: url }, from: from }
305
319
  end
306
320
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonItu
2
- VERSION = "0.2.2".freeze
2
+ VERSION = "0.2.3".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-itu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-10 00:00:00.000000000 Z
11
+ date: 2019-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler