relaton-itu 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/grammars/biblio.rng +89 -32
- data/grammars/isodoc.rng +450 -4
- data/lib/relaton_itu.rb +0 -5
- data/lib/relaton_itu/editorial_group.rb +6 -4
- data/lib/relaton_itu/hash_converter.rb +10 -1
- data/lib/relaton_itu/hit.rb +2 -2
- data/lib/relaton_itu/hit_collection.rb +38 -12
- data/lib/relaton_itu/itu_bibliographic_item.rb +6 -5
- data/lib/relaton_itu/itu_bibliography.rb +24 -21
- data/lib/relaton_itu/scrapper.rb +54 -103
- data/lib/relaton_itu/structured_identifier.rb +41 -0
- data/lib/relaton_itu/version.rb +1 -1
- data/lib/relaton_itu/xml_parser.rb +18 -13
- data/relaton-itu.gemspec +2 -2
- metadata +6 -6
- data/grammars/isostandard.rng +0 -522
data/lib/relaton_itu.rb
CHANGED
@@ -2,11 +2,6 @@ require "relaton_itu/version"
|
|
2
2
|
require "relaton_itu/itu_bibliography"
|
3
3
|
require "digest/md5"
|
4
4
|
|
5
|
-
# if defined? Relaton
|
6
|
-
# require_relative "relaton/processor"
|
7
|
-
# Relaton::Registry.instance.register(Relaton::RelatonItu::Processor)
|
8
|
-
# end
|
9
|
-
|
10
5
|
module RelatonItu
|
11
6
|
class Error < StandardError; end
|
12
7
|
|
@@ -16,8 +16,9 @@ module RelatonItu
|
|
16
16
|
# @param subgroup [Hash, RelatonItu::ItuGroup, NilClass]
|
17
17
|
# @param workgroup [Hash, RelatonItu::ItuGroup, NilClass]
|
18
18
|
def initialize(bureau:, group:, subgroup: nil, workgroup: nil)
|
19
|
-
|
20
|
-
|
19
|
+
unless BUREAUS.include? bureau
|
20
|
+
warn "[relaton-itu] WARNING: invalid bureau: #{bureau}"
|
21
|
+
end
|
21
22
|
@bureau = bureau
|
22
23
|
@group = group.is_a?(Hash) ? ItuGroup.new(group) : group
|
23
24
|
@subgroup = subgroup.is_a?(Hash) ? ItuGroup.new(subgroup) : subgroup
|
@@ -28,7 +29,7 @@ module RelatonItu
|
|
28
29
|
def to_xml(builder)
|
29
30
|
builder.editorialgroup do
|
30
31
|
builder.bureau bureau
|
31
|
-
builder.group { |b| group.to_xml b }
|
32
|
+
builder.group { |b| group.to_xml b } if group
|
32
33
|
builder.subgroup { |b| group.to_xml b } if subgroup
|
33
34
|
builder.workgroup { |b| group.to_xml b } if workgroup
|
34
35
|
end
|
@@ -36,7 +37,8 @@ module RelatonItu
|
|
36
37
|
|
37
38
|
# @return [Hash]
|
38
39
|
def to_hash
|
39
|
-
hash = { "bureau" => bureau
|
40
|
+
hash = { "bureau" => bureau }
|
41
|
+
hash["group"] = group.to_hash if group
|
40
42
|
hash["subgroup"] = subgroup.to_hash if subgroup
|
41
43
|
hash["workgroup"] = workgroup.to_hash if workgroup
|
42
44
|
hash
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module RelatonItu
|
2
|
-
class HashConverter <
|
2
|
+
class HashConverter < RelatonBib::HashConverter
|
3
3
|
class << self
|
4
4
|
private
|
5
5
|
|
@@ -9,6 +9,15 @@ module RelatonItu
|
|
9
9
|
|
10
10
|
ret[:editorialgroup] = EditorialGroup.new eg
|
11
11
|
end
|
12
|
+
|
13
|
+
# @param ret [Hash]
|
14
|
+
def structuredidentifier_hash_to_bib(ret)
|
15
|
+
return unless ret[:structuredidentifier]
|
16
|
+
|
17
|
+
ret[:structuredidentifier] = StructuredIdentifier.new(
|
18
|
+
ret[:structuredidentifier],
|
19
|
+
)
|
20
|
+
end
|
12
21
|
end
|
13
22
|
end
|
14
23
|
end
|
data/lib/relaton_itu/hit.rb
CHANGED
@@ -4,9 +4,9 @@ module RelatonItu
|
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
6
|
# Parse page.
|
7
|
-
# @return [
|
7
|
+
# @return [RelatonItu::ItuBibliographicItem]
|
8
8
|
def fetch
|
9
|
-
@fetch ||= Scrapper.parse_page
|
9
|
+
@fetch ||= Scrapper.parse_page hit, hit_collection.gi_imp
|
10
10
|
end
|
11
11
|
end
|
12
12
|
end
|
@@ -7,16 +7,39 @@ require "net/http"
|
|
7
7
|
module RelatonItu
|
8
8
|
# Page of hit collection.
|
9
9
|
class HitCollection < RelatonBib::HitCollection
|
10
|
-
DOMAIN = "https://www.itu.int"
|
10
|
+
DOMAIN = "https://www.itu.int"
|
11
11
|
|
12
|
-
# @
|
12
|
+
# @return [TrueClass, FalseClass]
|
13
|
+
attr_reader :gi_imp
|
14
|
+
|
15
|
+
# @param ref [String]
|
13
16
|
# @param year [String]
|
14
|
-
def initialize(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
def initialize(ref, year = nil)
|
18
|
+
text = ref.sub /(?<=\.)Imp\s?(?=\d)/, ""
|
19
|
+
super text, year
|
20
|
+
@gi_imp = /\.Imp\d/.match?(ref)
|
21
|
+
uri = URI "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
|
22
|
+
data = { json: params.to_json }
|
23
|
+
resp = Net::HTTP.post(uri, data.to_json,
|
24
|
+
"Content-Type" => "application/json")
|
25
|
+
@array = hits JSON.parse(resp.body)
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
# @return [String]
|
31
|
+
def group
|
32
|
+
@group ||= if %r{(OB|Operational Bulletin) No} =~ text then "Publications"
|
33
|
+
else "Recommendations"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# rubocop:disable Metrics/MethodLength
|
38
|
+
|
39
|
+
# @return [Hash]
|
40
|
+
def params
|
41
|
+
{
|
42
|
+
"Input" => text,
|
20
43
|
"Start" => 0,
|
21
44
|
"Rows" => 10,
|
22
45
|
"SortBy" => "RELEVANCE",
|
@@ -61,10 +84,13 @@ module RelatonItu
|
|
61
84
|
"IP" => "",
|
62
85
|
"SearchType" => "All",
|
63
86
|
}
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
87
|
+
end
|
88
|
+
# rubocop:enable Metrics/MethodLength
|
89
|
+
|
90
|
+
# @param data [Hash]
|
91
|
+
# @return [Array<RelatonItu::Hit>]
|
92
|
+
def hits(data)
|
93
|
+
data["results"].map do |h|
|
68
94
|
code = h["Media"]["Name"]
|
69
95
|
title = h["Title"]
|
70
96
|
url = h["Redirection"]
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module RelatonItu
|
2
|
-
class ItuBibliographicItem <
|
2
|
+
class ItuBibliographicItem < RelatonBib::BibliographicItem
|
3
3
|
TYPES = %w[
|
4
4
|
recommendation recommendation-supplement recommendation-amendment
|
5
5
|
recommendation-corrigendum recommendation-errata recommendation-annex
|
@@ -7,13 +7,14 @@ module RelatonItu
|
|
7
7
|
joint-itu-iso-iec
|
8
8
|
].freeze
|
9
9
|
|
10
|
+
# @params structuredidentifier [RelatonItu::StructuredIdentifier]
|
10
11
|
def initialize(**args)
|
11
|
-
@doctype = args.delete :doctype
|
12
|
-
if doctype && !TYPES.include?(doctype)
|
13
|
-
|
12
|
+
# @doctype = args.delete :doctype
|
13
|
+
if args[:doctype] && !TYPES.include?(args[:doctype])
|
14
|
+
warn "[relaton-itu] WARNING: invalid doctype: #{args[:doctype]}"
|
14
15
|
end
|
15
|
-
|
16
16
|
super
|
17
|
+
# @doctype = args[:doctype]
|
17
18
|
end
|
18
19
|
end
|
19
20
|
end
|
@@ -1,8 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "
|
3
|
+
require "relaton_bib"
|
4
4
|
require "relaton_itu/itu_bibliographic_item"
|
5
5
|
require "relaton_itu/editorial_group"
|
6
|
+
require "relaton_itu/structured_identifier"
|
6
7
|
require "relaton_itu/itu_group"
|
7
8
|
require "relaton_itu/scrapper"
|
8
9
|
require "relaton_itu/hit_collection"
|
@@ -19,9 +20,9 @@ module RelatonItu
|
|
19
20
|
# @return [RelatonItu::HitCollection]
|
20
21
|
def search(text, year = nil)
|
21
22
|
HitCollection.new text, year
|
22
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
23
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
24
|
-
OpenSSL::SSL::SSLError
|
23
|
+
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
24
|
+
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
25
|
+
Net::ProtocolError, OpenSSL::SSL::SSLError
|
25
26
|
raise RelatonBib::RequestError, "Could not access http://www.itu.int"
|
26
27
|
end
|
27
28
|
|
@@ -66,17 +67,17 @@ module RelatonItu
|
|
66
67
|
nil
|
67
68
|
end
|
68
69
|
|
69
|
-
def fetch_pages(hits, threads)
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
end
|
70
|
+
# def fetch_pages(hits, threads)
|
71
|
+
# workers = RelatonBib::WorkersPool.new threads
|
72
|
+
# workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
73
|
+
# hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
74
|
+
# workers.end
|
75
|
+
# workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
76
|
+
# end
|
76
77
|
|
77
78
|
def search_filter(code)
|
78
|
-
docidrx = %r{\w
|
79
|
-
c = code.match(docidrx).to_s
|
79
|
+
docidrx = %r{\w+\.\d+|\w\sSuppl\.\s\d+} # %r{^ITU-T\s[^\s]+}
|
80
|
+
c = code.sub(/Imp\s?/, "").match(docidrx).to_s
|
80
81
|
warn "[relaton-itu] (\"#{code}\") fetching..."
|
81
82
|
result = search(code)
|
82
83
|
result.select do |i|
|
@@ -93,16 +94,18 @@ module RelatonItu
|
|
93
94
|
# If no match, returns any years which caused mismatch, for error reporting
|
94
95
|
def isobib_results_filter(result, year)
|
95
96
|
missed_years = []
|
96
|
-
result.each_slice(3) do |s| # ISO website only allows 3 connections
|
97
|
-
|
98
|
-
|
97
|
+
# result.each_slice(3) do |s| # ISO website only allows 3 connections
|
98
|
+
# fetch_pages(s, 3).each do |r|
|
99
|
+
result.each do |r|
|
100
|
+
return { ret: r.fetch } if !year
|
99
101
|
|
100
|
-
|
101
|
-
|
102
|
+
/\(\d{2}\/(?<pyear>\d{4})\)/ =~ r.hit[:code]
|
103
|
+
# r.date.select { |d| d.type == "published" }.each do |d|
|
104
|
+
return { ret: r.fetch } if year == pyear
|
102
105
|
|
103
|
-
|
104
|
-
|
105
|
-
end
|
106
|
+
missed_years << pyear
|
107
|
+
# end
|
108
|
+
# end
|
106
109
|
end
|
107
110
|
{ years: missed_years }
|
108
111
|
end
|
data/lib/relaton_itu/scrapper.rb
CHANGED
@@ -3,16 +3,9 @@
|
|
3
3
|
require "nokogiri"
|
4
4
|
require "net/http"
|
5
5
|
|
6
|
-
# Capybara.request_driver :poltergeist do |app|
|
7
|
-
# Capybara::Poltergeist::Driver.new app, js_errors: false
|
8
|
-
# end
|
9
|
-
# Capybara.default_driver = :poltergeist
|
10
|
-
|
11
6
|
module RelatonItu
|
12
7
|
# Scrapper.
|
13
|
-
# rubocop:disable Metrics/ModuleLength
|
14
8
|
module Scrapper
|
15
|
-
DOMAIN = "https://www.itu.int"
|
16
9
|
ROMAN_MONTHS = %w[I II III IV V VI VII VIII IX X XI XII].freeze
|
17
10
|
|
18
11
|
TYPES = {
|
@@ -31,30 +24,26 @@ module RelatonItu
|
|
31
24
|
}.freeze
|
32
25
|
|
33
26
|
class << self
|
34
|
-
#
|
35
|
-
# @return [Array<Hash>]
|
36
|
-
# def get(text)
|
37
|
-
# iso_workers = WorkersPool.new 4
|
38
|
-
# iso_workers.worker { |hit| iso_worker(hit, iso_workers) }
|
39
|
-
# algolia_workers = start_algolia_search(text, iso_workers)
|
40
|
-
# iso_docs = iso_workers.result
|
41
|
-
# algolia_workers.end
|
42
|
-
# algolia_workers.result
|
43
|
-
# iso_docs
|
44
|
-
# end
|
27
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
45
28
|
|
46
29
|
# Parse page.
|
47
|
-
# @param
|
30
|
+
# @param hit_data [Hash]
|
48
31
|
# @return [Hash]
|
49
|
-
|
50
|
-
def parse_page(hit_data)
|
32
|
+
def parse_page(hit_data, imp = false)
|
51
33
|
url, doc = get_page hit_data[:url]
|
34
|
+
if imp
|
35
|
+
a = doc.at "//span[contains(@id, 'tab_ig_uc_rec')]/a"
|
36
|
+
return unless a
|
37
|
+
|
38
|
+
url, doc = get_page URI.join(url, a[:href]).to_s
|
39
|
+
end
|
52
40
|
|
53
41
|
# Fetch edition.
|
54
42
|
edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")&.text
|
55
43
|
|
56
44
|
ItuBibliographicItem.new(
|
57
45
|
fetched: Date.today.to_s,
|
46
|
+
type: "standard",
|
58
47
|
docid: fetch_docid(doc),
|
59
48
|
edition: edition,
|
60
49
|
language: ["en"],
|
@@ -73,7 +62,7 @@ module RelatonItu
|
|
73
62
|
place: ["Geneva"],
|
74
63
|
)
|
75
64
|
end
|
76
|
-
# rubocop:enable Metrics/AbcSize
|
65
|
+
# rubocop:enable Metrics/AbcSize
|
77
66
|
|
78
67
|
private
|
79
68
|
|
@@ -96,37 +85,23 @@ module RelatonItu
|
|
96
85
|
}]
|
97
86
|
end
|
98
87
|
|
99
|
-
# Get langs.
|
100
|
-
# @param doc [Nokogiri::HTML::Document]
|
101
|
-
# @return [Array<Hash>]
|
102
|
-
# def langs(doc)
|
103
|
-
# lgs = [{ lang: 'en' }]
|
104
|
-
# doc.css('ul#lang-switcher ul li a').each do |lang_link|
|
105
|
-
# lang_path = lang_link.attr('href')
|
106
|
-
# lang = lang_path.match(%r{^\/(fr)\/})
|
107
|
-
# lgs << { lang: lang[1], path: lang_path } if lang
|
108
|
-
# end
|
109
|
-
# lgs
|
110
|
-
# end
|
111
|
-
|
112
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
113
88
|
# Get page.
|
114
89
|
# @param path [String] page's path
|
115
|
-
# @return [Array<Nokogiri::HTML::Document
|
90
|
+
# @return [Array<String, Nokogiri::HTML::Document>]
|
116
91
|
def get_page(url)
|
117
92
|
uri = URI url
|
118
|
-
resp = Net::HTTP.get_response(uri)
|
93
|
+
resp = Net::HTTP.get_response(uri)
|
119
94
|
until resp.code == "200"
|
120
95
|
uri = URI resp["location"] if resp.code =~ /^30/
|
121
|
-
resp = Net::HTTP.get_response(uri)
|
96
|
+
resp = Net::HTTP.get_response(uri)
|
122
97
|
end
|
123
98
|
[uri.to_s, Nokogiri::HTML(resp.body)]
|
124
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
125
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
126
|
-
OpenSSL::SSL::SSLError
|
99
|
+
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
100
|
+
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
101
|
+
Net::ProtocolError, OpenSSL::SSL::SSLError
|
127
102
|
raise RelatonBib::RequestError, "Could not access #{url}"
|
128
103
|
end
|
129
|
-
# rubocop:enable Metrics/
|
104
|
+
# rubocop:enable Metrics/MethodLength
|
130
105
|
|
131
106
|
# Fetch docid.
|
132
107
|
# @param doc [Nokogiri::HTML::Document]
|
@@ -135,9 +110,11 @@ module RelatonItu
|
|
135
110
|
doc.xpath(
|
136
111
|
"//span[@id='ctl00_content_main_uc_rec_main_info1_rpt_main_ctl00_lbl_rec']",
|
137
112
|
"//td[.='Identical standard:']/following-sibling::td",
|
113
|
+
"//div/table[1]/tr[4]/td/strong",
|
138
114
|
).map do |code|
|
139
|
-
id = code.text.match(%r{^.*?(?= \()}).to_s.squeeze(" ")
|
115
|
+
id = code.text.match(%r{^.*?(?= \()|\w\.Imp\s?\d+}).to_s.squeeze(" ")
|
140
116
|
type = id.match(%r{^\w+}).to_s
|
117
|
+
type = "ITU" if type == "G"
|
141
118
|
RelatonBib::DocumentIdentifier.new(type: type, id: id)
|
142
119
|
end
|
143
120
|
end
|
@@ -146,10 +123,11 @@ module RelatonItu
|
|
146
123
|
# @param doc [Nokogiri::HTML::Document]
|
147
124
|
# @return [RelatonBib::DocumentStatus, NilClass]
|
148
125
|
def fetch_status(doc)
|
149
|
-
s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]"
|
126
|
+
s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]",
|
127
|
+
"//p[contains(.,'Status :')]")
|
150
128
|
return unless s
|
151
129
|
|
152
|
-
status = s.text
|
130
|
+
status = s.text.include?("In force") ? "Published" : "Withdrawal"
|
153
131
|
RelatonBib::DocumentStatus.new(stage: status)
|
154
132
|
end
|
155
133
|
|
@@ -191,55 +169,22 @@ module RelatonItu
|
|
191
169
|
# @return [Array<Hash>]
|
192
170
|
def fetch_relations(doc)
|
193
171
|
doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
|
194
|
-
# r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
|
195
172
|
ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
|
196
|
-
# url = DOMAIN + ref[:href].sub(/^\./, "/ITU-T/recommendations")
|
197
173
|
fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en", script: "Latn")
|
198
|
-
bibitem =
|
174
|
+
bibitem = ItuBibliographicItem.new(formattedref: fref, type: "standard")
|
199
175
|
{ type: "complements", bibitem: bibitem }
|
200
176
|
end
|
201
177
|
end
|
202
178
|
# rubocop:enable Metrics/MethodLength
|
203
179
|
|
204
|
-
# Fetch type.
|
205
|
-
# @param doc [Nokogiri::HTML::Document]
|
206
|
-
# @return [String]
|
207
|
-
# def fetch_type(_doc)
|
208
|
-
# "recommendation"
|
209
|
-
# end
|
210
|
-
|
211
180
|
# Fetch titles.
|
212
181
|
# @param doc [Nokogiri::HTML::Document]
|
213
182
|
# @return [Array<Hash>]
|
214
183
|
def fetch_titles(doc)
|
215
|
-
|
216
|
-
# t = hit_data[:title] if t.empty?
|
217
|
-
t = doc.at("//td[@class='title']")
|
184
|
+
t = doc.at("//td[@class='title']|//div/table[1]/tr[4]/td/strong")
|
218
185
|
return [] unless t
|
219
|
-
|
220
|
-
|
221
|
-
when 0
|
222
|
-
intro, main, part = nil, "", nil
|
223
|
-
when 1
|
224
|
-
intro, main, part = nil, titles[0], nil
|
225
|
-
when 2
|
226
|
-
if /^(Part|Partie) \d+:/ =~ titles[1]
|
227
|
-
intro, main, part = nil, titles[0], titles[1]
|
228
|
-
else
|
229
|
-
intro, main, part = titles[0], titles[1], nil
|
230
|
-
end
|
231
|
-
when 3
|
232
|
-
intro, main, part = titles[0], titles[1], titles[2]
|
233
|
-
else
|
234
|
-
intro, main, part = titles[0], titles[1], titles[2..-1]&.join(" -- ")
|
235
|
-
end
|
236
|
-
[{
|
237
|
-
title_intro: intro,
|
238
|
-
title_main: main,
|
239
|
-
title_part: part,
|
240
|
-
language: "en",
|
241
|
-
script: "Latn",
|
242
|
-
}]
|
186
|
+
|
187
|
+
RelatonBib::TypedTitleString.from_string t.text, "en", "Latn"
|
243
188
|
end
|
244
189
|
|
245
190
|
# Fetch dates
|
@@ -247,10 +192,11 @@ module RelatonItu
|
|
247
192
|
# @return [Array<Hash>]
|
248
193
|
def fetch_dates(doc)
|
249
194
|
dates = []
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
195
|
+
date = doc.at("//table/tr/td/span[contains(@id, 'Label5')]",
|
196
|
+
"//p[contains(.,'Approved in')]")
|
197
|
+
pdate = date&.text&.match(/\d{4}-\d{2}-\d{2}/).to_s || ob_date(doc)
|
198
|
+
if pdate && !pdate&.empty?
|
199
|
+
dates << { type: "published", on: pdate }
|
254
200
|
end
|
255
201
|
dates
|
256
202
|
end
|
@@ -278,40 +224,45 @@ module RelatonItu
|
|
278
224
|
# @param doc [Nokogiri::HTML::Document]
|
279
225
|
# @return [Array<Hash>]
|
280
226
|
def fetch_contributors(code)
|
227
|
+
return [] unless code
|
228
|
+
|
281
229
|
abbrev = code.sub(/-\w\s.*/, "")
|
282
230
|
case abbrev
|
283
231
|
when "ITU"
|
284
232
|
name = "International Telecommunication Union"
|
285
233
|
url = "www.itu.int"
|
286
234
|
end
|
287
|
-
[{ entity: { name: name, url: url, abbreviation: abbrev },
|
235
|
+
[{ entity: { name: name, url: url, abbreviation: abbrev },
|
236
|
+
role: [type: "publisher"] }]
|
288
237
|
end
|
289
238
|
|
290
|
-
# Fetch ICS.
|
291
|
-
# @param doc [Nokogiri::HTML::Document]
|
292
|
-
# @return [Array<Hash>]
|
293
|
-
# def fetch_ics(doc)
|
294
|
-
# doc.xpath('//th[contains(text(), "ICS")]/following-sibling::td/a').map do |i|
|
295
|
-
# code = i.text.match(/[\d\.]+/).to_s.split '.'
|
296
|
-
# { field: code[0], group: code[1], subgroup: code[2] }
|
297
|
-
# end
|
298
|
-
# end
|
299
|
-
|
300
239
|
# Fetch links.
|
301
240
|
# @param doc [Nokogiri::HTML::Document]
|
302
241
|
# @param url [String]
|
303
242
|
# @return [Array<Hash>]
|
304
243
|
def fetch_link(doc, url)
|
305
244
|
links = [{ type: "src", content: url }]
|
306
|
-
|
307
|
-
|
245
|
+
obp_elm = doc.at(
|
246
|
+
'//a[@title="Persistent link to download the PDF file"]',
|
247
|
+
"//font[contains(.,'PDF')]/../..",
|
248
|
+
)
|
249
|
+
links << typed_link("obp", obp_elm) if obp_elm
|
250
|
+
wrd_elm = doc.at("//font[contains(.,'Word')]/../..")
|
251
|
+
links << typed_link("word", wrd_elm) if wrd_elm
|
308
252
|
links
|
309
253
|
end
|
310
254
|
|
255
|
+
def typed_link(type, elm)
|
256
|
+
{
|
257
|
+
type: type,
|
258
|
+
content: URI.join(HitCollection::DOMAIN + elm[:href].strip).to_s,
|
259
|
+
}
|
260
|
+
end
|
261
|
+
|
311
262
|
# Fetch copyright.
|
312
263
|
# @param code [String]
|
313
264
|
# @param doc [Nokogiri::HTML::Document]
|
314
|
-
# @return [Hash]
|
265
|
+
# @return [Array<Hash>]
|
315
266
|
def fetch_copyright(code, doc)
|
316
267
|
abbreviation = code.match(/^[^-]+/).to_s
|
317
268
|
case abbreviation
|
@@ -321,9 +272,9 @@ module RelatonItu
|
|
321
272
|
end
|
322
273
|
fdate = doc.at("//table/tr/td/span[contains(@id, 'Label5')]")
|
323
274
|
from = fdate&.text || ob_date(doc)
|
324
|
-
{ owner: { name: name, abbreviation: abbreviation, url: url },
|
275
|
+
[{ owner: [{ name: name, abbreviation: abbreviation, url: url }],
|
276
|
+
from: from }]
|
325
277
|
end
|
326
278
|
end
|
327
279
|
end
|
328
|
-
# rubocop:enable Metrics/ModuleLength
|
329
280
|
end
|