relaton-itu 1.0.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/grammars/biblio.rng +89 -32
- data/grammars/isodoc.rng +450 -4
- data/lib/relaton_itu.rb +0 -5
- data/lib/relaton_itu/editorial_group.rb +6 -4
- data/lib/relaton_itu/hash_converter.rb +10 -1
- data/lib/relaton_itu/hit.rb +2 -2
- data/lib/relaton_itu/hit_collection.rb +38 -12
- data/lib/relaton_itu/itu_bibliographic_item.rb +6 -5
- data/lib/relaton_itu/itu_bibliography.rb +24 -21
- data/lib/relaton_itu/scrapper.rb +54 -103
- data/lib/relaton_itu/structured_identifier.rb +41 -0
- data/lib/relaton_itu/version.rb +1 -1
- data/lib/relaton_itu/xml_parser.rb +18 -13
- data/relaton-itu.gemspec +2 -2
- metadata +6 -6
- data/grammars/isostandard.rng +0 -522
data/lib/relaton_itu.rb
CHANGED
@@ -2,11 +2,6 @@ require "relaton_itu/version"
|
|
2
2
|
require "relaton_itu/itu_bibliography"
|
3
3
|
require "digest/md5"
|
4
4
|
|
5
|
-
# if defined? Relaton
|
6
|
-
# require_relative "relaton/processor"
|
7
|
-
# Relaton::Registry.instance.register(Relaton::RelatonItu::Processor)
|
8
|
-
# end
|
9
|
-
|
10
5
|
module RelatonItu
|
11
6
|
class Error < StandardError; end
|
12
7
|
|
@@ -16,8 +16,9 @@ module RelatonItu
|
|
16
16
|
# @param subgroup [Hash, RelatonItu::ItuGroup, NilClass]
|
17
17
|
# @param workgroup [Hash, RelatonItu::ItuGroup, NilClass]
|
18
18
|
def initialize(bureau:, group:, subgroup: nil, workgroup: nil)
|
19
|
-
|
20
|
-
|
19
|
+
unless BUREAUS.include? bureau
|
20
|
+
warn "[relaton-itu] WARNING: invalid bureau: #{bureau}"
|
21
|
+
end
|
21
22
|
@bureau = bureau
|
22
23
|
@group = group.is_a?(Hash) ? ItuGroup.new(group) : group
|
23
24
|
@subgroup = subgroup.is_a?(Hash) ? ItuGroup.new(subgroup) : subgroup
|
@@ -28,7 +29,7 @@ module RelatonItu
|
|
28
29
|
def to_xml(builder)
|
29
30
|
builder.editorialgroup do
|
30
31
|
builder.bureau bureau
|
31
|
-
builder.group { |b| group.to_xml b }
|
32
|
+
builder.group { |b| group.to_xml b } if group
|
32
33
|
builder.subgroup { |b| group.to_xml b } if subgroup
|
33
34
|
builder.workgroup { |b| group.to_xml b } if workgroup
|
34
35
|
end
|
@@ -36,7 +37,8 @@ module RelatonItu
|
|
36
37
|
|
37
38
|
# @return [Hash]
|
38
39
|
def to_hash
|
39
|
-
hash = { "bureau" => bureau
|
40
|
+
hash = { "bureau" => bureau }
|
41
|
+
hash["group"] = group.to_hash if group
|
40
42
|
hash["subgroup"] = subgroup.to_hash if subgroup
|
41
43
|
hash["workgroup"] = workgroup.to_hash if workgroup
|
42
44
|
hash
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module RelatonItu
|
2
|
-
class HashConverter <
|
2
|
+
class HashConverter < RelatonBib::HashConverter
|
3
3
|
class << self
|
4
4
|
private
|
5
5
|
|
@@ -9,6 +9,15 @@ module RelatonItu
|
|
9
9
|
|
10
10
|
ret[:editorialgroup] = EditorialGroup.new eg
|
11
11
|
end
|
12
|
+
|
13
|
+
# @param ret [Hash]
|
14
|
+
def structuredidentifier_hash_to_bib(ret)
|
15
|
+
return unless ret[:structuredidentifier]
|
16
|
+
|
17
|
+
ret[:structuredidentifier] = StructuredIdentifier.new(
|
18
|
+
ret[:structuredidentifier],
|
19
|
+
)
|
20
|
+
end
|
12
21
|
end
|
13
22
|
end
|
14
23
|
end
|
data/lib/relaton_itu/hit.rb
CHANGED
@@ -4,9 +4,9 @@ module RelatonItu
|
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
6
|
# Parse page.
|
7
|
-
# @return [
|
7
|
+
# @return [RelatonItu::ItuBibliographicItem]
|
8
8
|
def fetch
|
9
|
-
@fetch ||= Scrapper.parse_page
|
9
|
+
@fetch ||= Scrapper.parse_page hit, hit_collection.gi_imp
|
10
10
|
end
|
11
11
|
end
|
12
12
|
end
|
@@ -7,16 +7,39 @@ require "net/http"
|
|
7
7
|
module RelatonItu
|
8
8
|
# Page of hit collection.
|
9
9
|
class HitCollection < RelatonBib::HitCollection
|
10
|
-
DOMAIN = "https://www.itu.int"
|
10
|
+
DOMAIN = "https://www.itu.int"
|
11
11
|
|
12
|
-
# @
|
12
|
+
# @return [TrueClass, FalseClass]
|
13
|
+
attr_reader :gi_imp
|
14
|
+
|
15
|
+
# @param ref [String]
|
13
16
|
# @param year [String]
|
14
|
-
def initialize(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
def initialize(ref, year = nil)
|
18
|
+
text = ref.sub /(?<=\.)Imp\s?(?=\d)/, ""
|
19
|
+
super text, year
|
20
|
+
@gi_imp = /\.Imp\d/.match?(ref)
|
21
|
+
uri = URI "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
|
22
|
+
data = { json: params.to_json }
|
23
|
+
resp = Net::HTTP.post(uri, data.to_json,
|
24
|
+
"Content-Type" => "application/json")
|
25
|
+
@array = hits JSON.parse(resp.body)
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
# @return [String]
|
31
|
+
def group
|
32
|
+
@group ||= if %r{(OB|Operational Bulletin) No} =~ text then "Publications"
|
33
|
+
else "Recommendations"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# rubocop:disable Metrics/MethodLength
|
38
|
+
|
39
|
+
# @return [Hash]
|
40
|
+
def params
|
41
|
+
{
|
42
|
+
"Input" => text,
|
20
43
|
"Start" => 0,
|
21
44
|
"Rows" => 10,
|
22
45
|
"SortBy" => "RELEVANCE",
|
@@ -61,10 +84,13 @@ module RelatonItu
|
|
61
84
|
"IP" => "",
|
62
85
|
"SearchType" => "All",
|
63
86
|
}
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
87
|
+
end
|
88
|
+
# rubocop:enable Metrics/MethodLength
|
89
|
+
|
90
|
+
# @param data [Hash]
|
91
|
+
# @return [Array<RelatonItu::Hit>]
|
92
|
+
def hits(data)
|
93
|
+
data["results"].map do |h|
|
68
94
|
code = h["Media"]["Name"]
|
69
95
|
title = h["Title"]
|
70
96
|
url = h["Redirection"]
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module RelatonItu
|
2
|
-
class ItuBibliographicItem <
|
2
|
+
class ItuBibliographicItem < RelatonBib::BibliographicItem
|
3
3
|
TYPES = %w[
|
4
4
|
recommendation recommendation-supplement recommendation-amendment
|
5
5
|
recommendation-corrigendum recommendation-errata recommendation-annex
|
@@ -7,13 +7,14 @@ module RelatonItu
|
|
7
7
|
joint-itu-iso-iec
|
8
8
|
].freeze
|
9
9
|
|
10
|
+
# @params structuredidentifier [RelatonItu::StructuredIdentifier]
|
10
11
|
def initialize(**args)
|
11
|
-
@doctype = args.delete :doctype
|
12
|
-
if doctype && !TYPES.include?(doctype)
|
13
|
-
|
12
|
+
# @doctype = args.delete :doctype
|
13
|
+
if args[:doctype] && !TYPES.include?(args[:doctype])
|
14
|
+
warn "[relaton-itu] WARNING: invalid doctype: #{args[:doctype]}"
|
14
15
|
end
|
15
|
-
|
16
16
|
super
|
17
|
+
# @doctype = args[:doctype]
|
17
18
|
end
|
18
19
|
end
|
19
20
|
end
|
@@ -1,8 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "
|
3
|
+
require "relaton_bib"
|
4
4
|
require "relaton_itu/itu_bibliographic_item"
|
5
5
|
require "relaton_itu/editorial_group"
|
6
|
+
require "relaton_itu/structured_identifier"
|
6
7
|
require "relaton_itu/itu_group"
|
7
8
|
require "relaton_itu/scrapper"
|
8
9
|
require "relaton_itu/hit_collection"
|
@@ -19,9 +20,9 @@ module RelatonItu
|
|
19
20
|
# @return [RelatonItu::HitCollection]
|
20
21
|
def search(text, year = nil)
|
21
22
|
HitCollection.new text, year
|
22
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
23
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
24
|
-
OpenSSL::SSL::SSLError
|
23
|
+
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
24
|
+
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
25
|
+
Net::ProtocolError, OpenSSL::SSL::SSLError
|
25
26
|
raise RelatonBib::RequestError, "Could not access http://www.itu.int"
|
26
27
|
end
|
27
28
|
|
@@ -66,17 +67,17 @@ module RelatonItu
|
|
66
67
|
nil
|
67
68
|
end
|
68
69
|
|
69
|
-
def fetch_pages(hits, threads)
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
end
|
70
|
+
# def fetch_pages(hits, threads)
|
71
|
+
# workers = RelatonBib::WorkersPool.new threads
|
72
|
+
# workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
73
|
+
# hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
74
|
+
# workers.end
|
75
|
+
# workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
76
|
+
# end
|
76
77
|
|
77
78
|
def search_filter(code)
|
78
|
-
docidrx = %r{\w
|
79
|
-
c = code.match(docidrx).to_s
|
79
|
+
docidrx = %r{\w+\.\d+|\w\sSuppl\.\s\d+} # %r{^ITU-T\s[^\s]+}
|
80
|
+
c = code.sub(/Imp\s?/, "").match(docidrx).to_s
|
80
81
|
warn "[relaton-itu] (\"#{code}\") fetching..."
|
81
82
|
result = search(code)
|
82
83
|
result.select do |i|
|
@@ -93,16 +94,18 @@ module RelatonItu
|
|
93
94
|
# If no match, returns any years which caused mismatch, for error reporting
|
94
95
|
def isobib_results_filter(result, year)
|
95
96
|
missed_years = []
|
96
|
-
result.each_slice(3) do |s| # ISO website only allows 3 connections
|
97
|
-
|
98
|
-
|
97
|
+
# result.each_slice(3) do |s| # ISO website only allows 3 connections
|
98
|
+
# fetch_pages(s, 3).each do |r|
|
99
|
+
result.each do |r|
|
100
|
+
return { ret: r.fetch } if !year
|
99
101
|
|
100
|
-
|
101
|
-
|
102
|
+
/\(\d{2}\/(?<pyear>\d{4})\)/ =~ r.hit[:code]
|
103
|
+
# r.date.select { |d| d.type == "published" }.each do |d|
|
104
|
+
return { ret: r.fetch } if year == pyear
|
102
105
|
|
103
|
-
|
104
|
-
|
105
|
-
end
|
106
|
+
missed_years << pyear
|
107
|
+
# end
|
108
|
+
# end
|
106
109
|
end
|
107
110
|
{ years: missed_years }
|
108
111
|
end
|
data/lib/relaton_itu/scrapper.rb
CHANGED
@@ -3,16 +3,9 @@
|
|
3
3
|
require "nokogiri"
|
4
4
|
require "net/http"
|
5
5
|
|
6
|
-
# Capybara.request_driver :poltergeist do |app|
|
7
|
-
# Capybara::Poltergeist::Driver.new app, js_errors: false
|
8
|
-
# end
|
9
|
-
# Capybara.default_driver = :poltergeist
|
10
|
-
|
11
6
|
module RelatonItu
|
12
7
|
# Scrapper.
|
13
|
-
# rubocop:disable Metrics/ModuleLength
|
14
8
|
module Scrapper
|
15
|
-
DOMAIN = "https://www.itu.int"
|
16
9
|
ROMAN_MONTHS = %w[I II III IV V VI VII VIII IX X XI XII].freeze
|
17
10
|
|
18
11
|
TYPES = {
|
@@ -31,30 +24,26 @@ module RelatonItu
|
|
31
24
|
}.freeze
|
32
25
|
|
33
26
|
class << self
|
34
|
-
#
|
35
|
-
# @return [Array<Hash>]
|
36
|
-
# def get(text)
|
37
|
-
# iso_workers = WorkersPool.new 4
|
38
|
-
# iso_workers.worker { |hit| iso_worker(hit, iso_workers) }
|
39
|
-
# algolia_workers = start_algolia_search(text, iso_workers)
|
40
|
-
# iso_docs = iso_workers.result
|
41
|
-
# algolia_workers.end
|
42
|
-
# algolia_workers.result
|
43
|
-
# iso_docs
|
44
|
-
# end
|
27
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
45
28
|
|
46
29
|
# Parse page.
|
47
|
-
# @param
|
30
|
+
# @param hit_data [Hash]
|
48
31
|
# @return [Hash]
|
49
|
-
|
50
|
-
def parse_page(hit_data)
|
32
|
+
def parse_page(hit_data, imp = false)
|
51
33
|
url, doc = get_page hit_data[:url]
|
34
|
+
if imp
|
35
|
+
a = doc.at "//span[contains(@id, 'tab_ig_uc_rec')]/a"
|
36
|
+
return unless a
|
37
|
+
|
38
|
+
url, doc = get_page URI.join(url, a[:href]).to_s
|
39
|
+
end
|
52
40
|
|
53
41
|
# Fetch edition.
|
54
42
|
edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")&.text
|
55
43
|
|
56
44
|
ItuBibliographicItem.new(
|
57
45
|
fetched: Date.today.to_s,
|
46
|
+
type: "standard",
|
58
47
|
docid: fetch_docid(doc),
|
59
48
|
edition: edition,
|
60
49
|
language: ["en"],
|
@@ -73,7 +62,7 @@ module RelatonItu
|
|
73
62
|
place: ["Geneva"],
|
74
63
|
)
|
75
64
|
end
|
76
|
-
# rubocop:enable Metrics/AbcSize
|
65
|
+
# rubocop:enable Metrics/AbcSize
|
77
66
|
|
78
67
|
private
|
79
68
|
|
@@ -96,37 +85,23 @@ module RelatonItu
|
|
96
85
|
}]
|
97
86
|
end
|
98
87
|
|
99
|
-
# Get langs.
|
100
|
-
# @param doc [Nokogiri::HTML::Document]
|
101
|
-
# @return [Array<Hash>]
|
102
|
-
# def langs(doc)
|
103
|
-
# lgs = [{ lang: 'en' }]
|
104
|
-
# doc.css('ul#lang-switcher ul li a').each do |lang_link|
|
105
|
-
# lang_path = lang_link.attr('href')
|
106
|
-
# lang = lang_path.match(%r{^\/(fr)\/})
|
107
|
-
# lgs << { lang: lang[1], path: lang_path } if lang
|
108
|
-
# end
|
109
|
-
# lgs
|
110
|
-
# end
|
111
|
-
|
112
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
113
88
|
# Get page.
|
114
89
|
# @param path [String] page's path
|
115
|
-
# @return [Array<Nokogiri::HTML::Document
|
90
|
+
# @return [Array<String, Nokogiri::HTML::Document>]
|
116
91
|
def get_page(url)
|
117
92
|
uri = URI url
|
118
|
-
resp = Net::HTTP.get_response(uri)
|
93
|
+
resp = Net::HTTP.get_response(uri)
|
119
94
|
until resp.code == "200"
|
120
95
|
uri = URI resp["location"] if resp.code =~ /^30/
|
121
|
-
resp = Net::HTTP.get_response(uri)
|
96
|
+
resp = Net::HTTP.get_response(uri)
|
122
97
|
end
|
123
98
|
[uri.to_s, Nokogiri::HTML(resp.body)]
|
124
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
125
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
126
|
-
OpenSSL::SSL::SSLError
|
99
|
+
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
100
|
+
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
101
|
+
Net::ProtocolError, OpenSSL::SSL::SSLError
|
127
102
|
raise RelatonBib::RequestError, "Could not access #{url}"
|
128
103
|
end
|
129
|
-
# rubocop:enable Metrics/
|
104
|
+
# rubocop:enable Metrics/MethodLength
|
130
105
|
|
131
106
|
# Fetch docid.
|
132
107
|
# @param doc [Nokogiri::HTML::Document]
|
@@ -135,9 +110,11 @@ module RelatonItu
|
|
135
110
|
doc.xpath(
|
136
111
|
"//span[@id='ctl00_content_main_uc_rec_main_info1_rpt_main_ctl00_lbl_rec']",
|
137
112
|
"//td[.='Identical standard:']/following-sibling::td",
|
113
|
+
"//div/table[1]/tr[4]/td/strong",
|
138
114
|
).map do |code|
|
139
|
-
id = code.text.match(%r{^.*?(?= \()}).to_s.squeeze(" ")
|
115
|
+
id = code.text.match(%r{^.*?(?= \()|\w\.Imp\s?\d+}).to_s.squeeze(" ")
|
140
116
|
type = id.match(%r{^\w+}).to_s
|
117
|
+
type = "ITU" if type == "G"
|
141
118
|
RelatonBib::DocumentIdentifier.new(type: type, id: id)
|
142
119
|
end
|
143
120
|
end
|
@@ -146,10 +123,11 @@ module RelatonItu
|
|
146
123
|
# @param doc [Nokogiri::HTML::Document]
|
147
124
|
# @return [RelatonBib::DocumentStatus, NilClass]
|
148
125
|
def fetch_status(doc)
|
149
|
-
s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]"
|
126
|
+
s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]",
|
127
|
+
"//p[contains(.,'Status :')]")
|
150
128
|
return unless s
|
151
129
|
|
152
|
-
status = s.text
|
130
|
+
status = s.text.include?("In force") ? "Published" : "Withdrawal"
|
153
131
|
RelatonBib::DocumentStatus.new(stage: status)
|
154
132
|
end
|
155
133
|
|
@@ -191,55 +169,22 @@ module RelatonItu
|
|
191
169
|
# @return [Array<Hash>]
|
192
170
|
def fetch_relations(doc)
|
193
171
|
doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
|
194
|
-
# r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
|
195
172
|
ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
|
196
|
-
# url = DOMAIN + ref[:href].sub(/^\./, "/ITU-T/recommendations")
|
197
173
|
fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en", script: "Latn")
|
198
|
-
bibitem =
|
174
|
+
bibitem = ItuBibliographicItem.new(formattedref: fref, type: "standard")
|
199
175
|
{ type: "complements", bibitem: bibitem }
|
200
176
|
end
|
201
177
|
end
|
202
178
|
# rubocop:enable Metrics/MethodLength
|
203
179
|
|
204
|
-
# Fetch type.
|
205
|
-
# @param doc [Nokogiri::HTML::Document]
|
206
|
-
# @return [String]
|
207
|
-
# def fetch_type(_doc)
|
208
|
-
# "recommendation"
|
209
|
-
# end
|
210
|
-
|
211
180
|
# Fetch titles.
|
212
181
|
# @param doc [Nokogiri::HTML::Document]
|
213
182
|
# @return [Array<Hash>]
|
214
183
|
def fetch_titles(doc)
|
215
|
-
|
216
|
-
# t = hit_data[:title] if t.empty?
|
217
|
-
t = doc.at("//td[@class='title']")
|
184
|
+
t = doc.at("//td[@class='title']|//div/table[1]/tr[4]/td/strong")
|
218
185
|
return [] unless t
|
219
|
-
|
220
|
-
|
221
|
-
when 0
|
222
|
-
intro, main, part = nil, "", nil
|
223
|
-
when 1
|
224
|
-
intro, main, part = nil, titles[0], nil
|
225
|
-
when 2
|
226
|
-
if /^(Part|Partie) \d+:/ =~ titles[1]
|
227
|
-
intro, main, part = nil, titles[0], titles[1]
|
228
|
-
else
|
229
|
-
intro, main, part = titles[0], titles[1], nil
|
230
|
-
end
|
231
|
-
when 3
|
232
|
-
intro, main, part = titles[0], titles[1], titles[2]
|
233
|
-
else
|
234
|
-
intro, main, part = titles[0], titles[1], titles[2..-1]&.join(" -- ")
|
235
|
-
end
|
236
|
-
[{
|
237
|
-
title_intro: intro,
|
238
|
-
title_main: main,
|
239
|
-
title_part: part,
|
240
|
-
language: "en",
|
241
|
-
script: "Latn",
|
242
|
-
}]
|
186
|
+
|
187
|
+
RelatonBib::TypedTitleString.from_string t.text, "en", "Latn"
|
243
188
|
end
|
244
189
|
|
245
190
|
# Fetch dates
|
@@ -247,10 +192,11 @@ module RelatonItu
|
|
247
192
|
# @return [Array<Hash>]
|
248
193
|
def fetch_dates(doc)
|
249
194
|
dates = []
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
195
|
+
date = doc.at("//table/tr/td/span[contains(@id, 'Label5')]",
|
196
|
+
"//p[contains(.,'Approved in')]")
|
197
|
+
pdate = date&.text&.match(/\d{4}-\d{2}-\d{2}/).to_s || ob_date(doc)
|
198
|
+
if pdate && !pdate&.empty?
|
199
|
+
dates << { type: "published", on: pdate }
|
254
200
|
end
|
255
201
|
dates
|
256
202
|
end
|
@@ -278,40 +224,45 @@ module RelatonItu
|
|
278
224
|
# @param doc [Nokogiri::HTML::Document]
|
279
225
|
# @return [Array<Hash>]
|
280
226
|
def fetch_contributors(code)
|
227
|
+
return [] unless code
|
228
|
+
|
281
229
|
abbrev = code.sub(/-\w\s.*/, "")
|
282
230
|
case abbrev
|
283
231
|
when "ITU"
|
284
232
|
name = "International Telecommunication Union"
|
285
233
|
url = "www.itu.int"
|
286
234
|
end
|
287
|
-
[{ entity: { name: name, url: url, abbreviation: abbrev },
|
235
|
+
[{ entity: { name: name, url: url, abbreviation: abbrev },
|
236
|
+
role: [type: "publisher"] }]
|
288
237
|
end
|
289
238
|
|
290
|
-
# Fetch ICS.
|
291
|
-
# @param doc [Nokogiri::HTML::Document]
|
292
|
-
# @return [Array<Hash>]
|
293
|
-
# def fetch_ics(doc)
|
294
|
-
# doc.xpath('//th[contains(text(), "ICS")]/following-sibling::td/a').map do |i|
|
295
|
-
# code = i.text.match(/[\d\.]+/).to_s.split '.'
|
296
|
-
# { field: code[0], group: code[1], subgroup: code[2] }
|
297
|
-
# end
|
298
|
-
# end
|
299
|
-
|
300
239
|
# Fetch links.
|
301
240
|
# @param doc [Nokogiri::HTML::Document]
|
302
241
|
# @param url [String]
|
303
242
|
# @return [Array<Hash>]
|
304
243
|
def fetch_link(doc, url)
|
305
244
|
links = [{ type: "src", content: url }]
|
306
|
-
|
307
|
-
|
245
|
+
obp_elm = doc.at(
|
246
|
+
'//a[@title="Persistent link to download the PDF file"]',
|
247
|
+
"//font[contains(.,'PDF')]/../..",
|
248
|
+
)
|
249
|
+
links << typed_link("obp", obp_elm) if obp_elm
|
250
|
+
wrd_elm = doc.at("//font[contains(.,'Word')]/../..")
|
251
|
+
links << typed_link("word", wrd_elm) if wrd_elm
|
308
252
|
links
|
309
253
|
end
|
310
254
|
|
255
|
+
def typed_link(type, elm)
|
256
|
+
{
|
257
|
+
type: type,
|
258
|
+
content: URI.join(HitCollection::DOMAIN + elm[:href].strip).to_s,
|
259
|
+
}
|
260
|
+
end
|
261
|
+
|
311
262
|
# Fetch copyright.
|
312
263
|
# @param code [String]
|
313
264
|
# @param doc [Nokogiri::HTML::Document]
|
314
|
-
# @return [Hash]
|
265
|
+
# @return [Array<Hash>]
|
315
266
|
def fetch_copyright(code, doc)
|
316
267
|
abbreviation = code.match(/^[^-]+/).to_s
|
317
268
|
case abbreviation
|
@@ -321,9 +272,9 @@ module RelatonItu
|
|
321
272
|
end
|
322
273
|
fdate = doc.at("//table/tr/td/span[contains(@id, 'Label5')]")
|
323
274
|
from = fdate&.text || ob_date(doc)
|
324
|
-
{ owner: { name: name, abbreviation: abbreviation, url: url },
|
275
|
+
[{ owner: [{ name: name, abbreviation: abbreviation, url: url }],
|
276
|
+
from: from }]
|
325
277
|
end
|
326
278
|
end
|
327
279
|
end
|
328
|
-
# rubocop:enable Metrics/ModuleLength
|
329
280
|
end
|