relaton-itu 1.7.3 → 1.7.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +46 -0
- data/README.adoc +24 -5
- data/bin/rspec +29 -0
- data/lib/relaton_itu.rb +1 -0
- data/lib/relaton_itu/editorial_group.rb +1 -1
- data/lib/relaton_itu/hash_converter.rb +11 -2
- data/lib/relaton_itu/hit.rb +3 -1
- data/lib/relaton_itu/hit_collection.rb +33 -10
- data/lib/relaton_itu/itu_bibliographic_item.rb +7 -0
- data/lib/relaton_itu/itu_bibliography.rb +36 -19
- data/lib/relaton_itu/itu_group.rb +1 -1
- data/lib/relaton_itu/processor.rb +1 -2
- data/lib/relaton_itu/scrapper.rb +71 -66
- data/lib/relaton_itu/version.rb +1 -1
- data/lib/relaton_itu/xml_parser.rb +1 -1
- data/relaton-itu.gemspec +4 -3
- metadata +20 -35
- data/.github/workflows/macos.yml +0 -34
- data/.github/workflows/ubuntu.yml +0 -33
- data/.github/workflows/windows.yml +0 -35
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 249fe896ec8a77979ca15d6a42da98ad2ac2620cfe8dc0f468cd14277c5a35b0
|
4
|
+
data.tar.gz: 62415ed835abc49cf00d3048b52556b3f718a4ad0dc531ec2c20572b95305210
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 65a5bcf91f851cc4ec3139fad83b0c83f143b1bafefe8c9638e34072f7d82f76b77517fedf153fcdf7b63ee903fcd372c64e378edaeb231835f477f004f7e94a
|
7
|
+
data.tar.gz: 61cdc7df34b24f5d3f3e56b967e9e2b34337bc164b32691747853660cbb6c9e337a6eee058162f97e218da356f4028d7c107b4be4ed7dc10f99253e6985fccd9
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
|
+
name: rake
|
4
|
+
|
5
|
+
on:
|
6
|
+
push:
|
7
|
+
branches: [ master, main ]
|
8
|
+
tags: [ v* ]
|
9
|
+
pull_request:
|
10
|
+
|
11
|
+
jobs:
|
12
|
+
rake:
|
13
|
+
name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
|
14
|
+
runs-on: ${{ matrix.os }}
|
15
|
+
continue-on-error: ${{ matrix.experimental }}
|
16
|
+
strategy:
|
17
|
+
fail-fast: false
|
18
|
+
matrix:
|
19
|
+
ruby: [ '2.7', '2.6', '2.5', '2.4' ]
|
20
|
+
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
|
+
experimental: [ false ]
|
22
|
+
include:
|
23
|
+
- ruby: '3.0'
|
24
|
+
os: 'ubuntu-latest'
|
25
|
+
experimental: true
|
26
|
+
- ruby: '3.0'
|
27
|
+
os: 'windows-latest'
|
28
|
+
experimental: true
|
29
|
+
- ruby: '3.0'
|
30
|
+
os: 'macos-latest'
|
31
|
+
experimental: true
|
32
|
+
steps:
|
33
|
+
- uses: actions/checkout@v2
|
34
|
+
with:
|
35
|
+
submodules: true
|
36
|
+
|
37
|
+
# https://github.com/ruby-debug/debase/issues/89#issuecomment-686827382
|
38
|
+
- if: matrix.os == 'macos-latest' && matrix.ruby == '2.5'
|
39
|
+
run: echo BUNDLE_BUILD__DEBASE="--with-cflags=\"-Wno-error=implicit-function-declaration\"" >> $GITHUB_ENV
|
40
|
+
|
41
|
+
- uses: ruby/setup-ruby@v1
|
42
|
+
with:
|
43
|
+
ruby-version: ${{ matrix.ruby }}
|
44
|
+
bundler-cache: true
|
45
|
+
|
46
|
+
- run: bundle exec rake
|
data/README.adoc
CHANGED
@@ -97,6 +97,29 @@ RelatonItu::ItuBibliography.get "ITU-T G.989.2/Amd 1"
|
|
97
97
|
...
|
98
98
|
----
|
99
99
|
|
100
|
+
=== Get ITU-R documents
|
101
|
+
|
102
|
+
[source,ruby]
|
103
|
+
----
|
104
|
+
RelatonItu::ItuBibliography.get "ITU-R REC-BO.600-1"
|
105
|
+
[relaton-itu] ("ITU-R REC-BO.600-1") fetching...
|
106
|
+
[relaton-itu] ("ITU-R REC-BO.600-1") found R-REC-BO.600-1
|
107
|
+
=> #<RelatonItu::ItuBibliographicItem:0x007f84e9930498
|
108
|
+
...
|
109
|
+
----
|
110
|
+
|
111
|
+
=== Get ITU-R Radio Regulations (RR)
|
112
|
+
|
113
|
+
[source,ruby]
|
114
|
+
----
|
115
|
+
RelatonItu::ItuBibliography.get 'ITU-R RR (2020)'
|
116
|
+
[relaton-itu] ("ITU-R RR") fetching...
|
117
|
+
[relaton-itu] WARNING: invalid doctype: publication
|
118
|
+
[relaton-itu] ("ITU-R RR") found ITU-R RR
|
119
|
+
=> #<RelatonItu::ItuBibliographicItem:0x007fb4b55eb660
|
120
|
+
...
|
121
|
+
----
|
122
|
+
|
100
123
|
=== Create bibliographic item form YAML
|
101
124
|
[source,ruby]
|
102
125
|
----
|
@@ -104,11 +127,7 @@ hash = YAML.load_file 'spec/examples/itu_bib_item.yml'
|
|
104
127
|
=> {"id"=>"ITU-T L.163 (11/2018)",
|
105
128
|
...
|
106
129
|
|
107
|
-
|
108
|
-
=> {:id=>"ITU-T L.163 (11/2018)",
|
109
|
-
...
|
110
|
-
|
111
|
-
RelatonItu::ItuBibliographicItem.new bib_hash
|
130
|
+
RelatonItu::ItuBibliographicItem.from_hash hash
|
112
131
|
=> #<RelatonItu::ItuBibliographicItem:0x007fd88ac02aa0
|
113
132
|
...
|
114
133
|
----
|
data/bin/rspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rspec' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rspec-core", "rspec")
|
data/lib/relaton_itu.rb
CHANGED
@@ -20,7 +20,7 @@ module RelatonItu
|
|
20
20
|
warn "[relaton-itu] WARNING: invalid bureau: #{bureau}"
|
21
21
|
end
|
22
22
|
@bureau = bureau
|
23
|
-
@group = group.is_a?(Hash) ? ItuGroup.new(group) : group
|
23
|
+
@group = group.is_a?(Hash) ? ItuGroup.new(**group) : group
|
24
24
|
@subgroup = subgroup.is_a?(Hash) ? ItuGroup.new(subgroup) : subgroup
|
25
25
|
@workgroup = workgroup.is_a?(Hash) ? ItuGroup.new(workgroup) : workgroup
|
26
26
|
end
|
@@ -3,11 +3,20 @@ module RelatonItu
|
|
3
3
|
class << self
|
4
4
|
private
|
5
5
|
|
6
|
+
#
|
7
|
+
# Ovverides superclass's method
|
8
|
+
#
|
9
|
+
# @param item [Hash]
|
10
|
+
# @retirn [RelatonItu::ItuBibliographicItem]
|
11
|
+
def bib_item(item)
|
12
|
+
ItuBibliographicItem.new(**item)
|
13
|
+
end
|
14
|
+
|
6
15
|
def editorialgroup_hash_to_bib(ret)
|
7
16
|
eg = ret[:editorialgroup]
|
8
17
|
return unless eg
|
9
18
|
|
10
|
-
ret[:editorialgroup] = EditorialGroup.new eg
|
19
|
+
ret[:editorialgroup] = EditorialGroup.new **eg
|
11
20
|
end
|
12
21
|
|
13
22
|
# @param ret [Hash]
|
@@ -15,7 +24,7 @@ module RelatonItu
|
|
15
24
|
return unless ret[:structuredidentifier]
|
16
25
|
|
17
26
|
ret[:structuredidentifier] = StructuredIdentifier.new(
|
18
|
-
ret[:structuredidentifier]
|
27
|
+
**ret[:structuredidentifier]
|
19
28
|
)
|
20
29
|
end
|
21
30
|
end
|
data/lib/relaton_itu/hit.rb
CHANGED
@@ -3,10 +3,12 @@
|
|
3
3
|
module RelatonItu
|
4
4
|
# Hit.
|
5
5
|
class Hit < RelatonBib::Hit
|
6
|
+
attr_writer :fetch
|
7
|
+
|
6
8
|
# Parse page.
|
7
9
|
# @return [RelatonItu::ItuBibliographicItem]
|
8
10
|
def fetch
|
9
|
-
@fetch ||= Scrapper.parse_page
|
11
|
+
@fetch ||= Scrapper.parse_page self, hit_collection.gi_imp
|
10
12
|
end
|
11
13
|
end
|
12
14
|
end
|
@@ -12,25 +12,48 @@ module RelatonItu
|
|
12
12
|
# @return [TrueClass, FalseClass]
|
13
13
|
attr_reader :gi_imp
|
14
14
|
|
15
|
+
# @return [Mechanize]
|
16
|
+
attr_reader :agent
|
17
|
+
|
15
18
|
# @param ref [String]
|
16
19
|
# @param year [String]
|
17
|
-
def initialize(ref, year = nil)
|
20
|
+
def initialize(ref, year = nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
18
21
|
text = ref.sub /(?<=\.)Imp\s?(?=\d)/, ""
|
19
22
|
super text, year
|
23
|
+
@agent = Mechanize.new
|
24
|
+
agent.user_agent_alias = "Mac Safari"
|
20
25
|
@gi_imp = /\.Imp\d/.match?(ref)
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
+
if ref.match? /^(ITU-T|ITU-R\sRR)/
|
27
|
+
url = "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
|
28
|
+
data = { json: params.to_json }
|
29
|
+
resp = agent.post url, data.to_json, "Content-Type" => "application/json"
|
30
|
+
@array = hits JSON.parse(resp.body)
|
31
|
+
elsif ref.match? /^ITU-R/
|
32
|
+
rf = ref.sub(/^ITU-R\s/, "").upcase
|
33
|
+
url = "https://raw.githubusercontent.com/relaton/relaton-data-itu-r/master/data/#{rf}.yaml"
|
34
|
+
resp = Net::HTTP.get_response(URI(url))
|
35
|
+
if resp.code == "404"
|
36
|
+
@array = []
|
37
|
+
return
|
38
|
+
end
|
39
|
+
|
40
|
+
hash = YAML.safe_load resp.body
|
41
|
+
item_hash = HashConverter.hash_to_bib(hash)
|
42
|
+
item = ItuBibliographicItem.new **item_hash
|
43
|
+
hit = Hit.new({ url: url }, self)
|
44
|
+
hit.fetch = item
|
45
|
+
@array = [hit]
|
46
|
+
end
|
26
47
|
end
|
27
48
|
|
28
49
|
private
|
29
50
|
|
30
51
|
# @return [String]
|
31
52
|
def group
|
32
|
-
@group ||=
|
33
|
-
|
53
|
+
@group ||= case text
|
54
|
+
when %r{OB|Operational Bulletin}, %r{^ITU-R\sRR}
|
55
|
+
"Publications"
|
56
|
+
when %r{^ITU-T} then "Recommendations"
|
34
57
|
end
|
35
58
|
end
|
36
59
|
|
@@ -46,7 +69,7 @@ module RelatonItu
|
|
46
69
|
"ExactPhrase" => false,
|
47
70
|
"CollectionName" => "General",
|
48
71
|
"CollectionGroup" => group,
|
49
|
-
"Sector" =>
|
72
|
+
"Sector" => text.match(/(?<=^ITU-)\w/).to_s.downcase,
|
50
73
|
"Criterias" => [{
|
51
74
|
"Name" => "Search in",
|
52
75
|
"Criterias" => [
|
@@ -93,7 +116,7 @@ module RelatonItu
|
|
93
116
|
code = h["Media"]["Name"]
|
94
117
|
title = h["Title"]
|
95
118
|
url = h["Redirection"]
|
96
|
-
type =
|
119
|
+
type = h["Collection"]["Group"].downcase[0...-1]
|
97
120
|
Hit.new({ code: code, title: title, url: url, type: type }, self)
|
98
121
|
end
|
99
122
|
end
|
@@ -19,20 +19,27 @@ module RelatonItu
|
|
19
19
|
# @param text [String]
|
20
20
|
# @return [RelatonItu::HitCollection]
|
21
21
|
def search(text, year = nil)
|
22
|
+
# code = text.sub(/(?<=ITU-T\s\w)\.(\w+\.)(?=\d+)/, ' \1')
|
23
|
+
if text =~ /(ITU-T\s\w)\.(Suppl\.|Annex)\s?(\w?\d+)/
|
24
|
+
correct_ref = "#{$~[1]} #{$~[2]} #{$~[3]}"
|
25
|
+
warn "[relaton-itu] WARNING: Incorrect reference #{text}"
|
26
|
+
warn "[relaton-itu] the reference should be #{correct_ref}"
|
27
|
+
end
|
22
28
|
HitCollection.new text, year
|
23
29
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
24
30
|
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
25
|
-
Net::ProtocolError,
|
26
|
-
raise RelatonBib::RequestError,
|
31
|
+
Net::ProtocolError, URI::InvalidURIError => e
|
32
|
+
raise RelatonBib::RequestError, e.message
|
27
33
|
end
|
28
34
|
|
29
35
|
# @param code [String] the ISO standard Code to look up (e..g "ISO 9000")
|
30
36
|
# @param year [String] the year the standard was published (optional)
|
31
|
-
# @param opts [Hash] options; restricted to :all_parts if all-parts
|
37
|
+
# @param opts [Hash] options; restricted to :all_parts if all-parts
|
38
|
+
# reference is required
|
32
39
|
# @return [String] Relaton XML serialisation of reference
|
33
40
|
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
34
41
|
if year.nil?
|
35
|
-
/^(?<code1>[^\s]+\s[^\s]+)\s\(\d{2}\/(?<year1>\d+)\)$/ =~ code
|
42
|
+
/^(?<code1>[^\s]+\s[^\s]+)\s\((\d{2}\/)?(?<year1>\d+)\)$/ =~ code
|
36
43
|
unless code1.nil?
|
37
44
|
code = code1
|
38
45
|
year = year1
|
@@ -54,11 +61,13 @@ module RelatonItu
|
|
54
61
|
id = year ? "#{code}:#{year}" : code
|
55
62
|
warn "[relaton-itu] WARNING: no match found online for #{id}. "\
|
56
63
|
"The code must be exactly like it is on the standards website."
|
57
|
-
|
58
|
-
"
|
59
|
-
|
60
|
-
|
61
|
-
|
64
|
+
unless missed_years.empty?
|
65
|
+
warn "[relaton-itu] (There was no match for #{year}, though there "\
|
66
|
+
"were matches found for #{missed_years.join(', ')}.)"
|
67
|
+
end
|
68
|
+
if /\d-\d/.match? code
|
69
|
+
warn "[relaton-itu] The provided document part may not exist, or "\
|
70
|
+
"the document may no longer be published in parts."
|
62
71
|
else
|
63
72
|
warn "[relaton-itu] If you wanted to cite all document parts for the reference, "\
|
64
73
|
"use \"#{code} (all parts)\".\nIf the document is not a standard, "\
|
@@ -69,10 +78,11 @@ module RelatonItu
|
|
69
78
|
|
70
79
|
def search_filter(code, year) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
71
80
|
%r{
|
72
|
-
^(?<pref1>ITU)?(-(?<type1>\w))?\s?(?<code1>[^\s\/]+)
|
81
|
+
^(?<pref1>ITU)?(-(?<type1>\w))?\s?(?<code1>[^\s\/]+(?:\/\w[\.\d]+)?)
|
82
|
+
(\s\(?(?<ver1>v\d+)\)?)?
|
73
83
|
(\s\(((?<month1>\d{2})\/)?(?<year1>\d{4})\))?
|
74
84
|
(\s-\s(?<buldate1>\d{2}\.\w{1,4}\.\d{4}))?
|
75
|
-
(
|
85
|
+
(\s(?<corr1>(Amd|Cor|Amendment|Corrigendum)\.?\s?\d+))?
|
76
86
|
(\s\(((?<cormonth1>\d{2})\/)?(?<coryear1>\d{4})\))?
|
77
87
|
}x =~ code
|
78
88
|
year ||= year1
|
@@ -81,21 +91,27 @@ module RelatonItu
|
|
81
91
|
warn "[relaton-itu] (\"#{code}\") fetching..."
|
82
92
|
result = search(code)
|
83
93
|
code1.sub! /(?<=\.)Imp(?=\d)/, "" if result.gi_imp
|
94
|
+
if corr1
|
95
|
+
corr1.sub!(/[\.\s]+/, " ").sub!("Amendment", "Amd")
|
96
|
+
corr1.sub!("Corrigendum", "Corr")
|
97
|
+
end
|
84
98
|
result.select do |i|
|
99
|
+
next true unless i.hit[:code]
|
100
|
+
|
85
101
|
%r{
|
86
102
|
^(?<pref2>ITU)?(-(?<type2>\w))?\s?(?<code2>[\S]+)
|
103
|
+
(\s\(?(?<ver2>v\d+)\)?)?
|
87
104
|
(\s\(((?<month2>\d{2})\/)?(?<year2>\d{4})\))?
|
88
105
|
(\s(?<corr2>(Amd|Cor)\.\s?\d+))?
|
89
106
|
(\s\(((?<cormonth2>\d{2})\/)?(?<coryear2>\d{4})\))?
|
90
107
|
}x =~ i.hit[:code]
|
91
108
|
/:[^\(]+\((?<buldate2>\d{2}\.\w{1,4}\.\d{4})\)/ =~ i.hit[:title]
|
92
109
|
corr2&.sub! /\.\s?/, " "
|
93
|
-
pref1 == pref2 && (!type1 || type1 == type2) && code1
|
110
|
+
pref1 == pref2 && (!type1 || type1 == type2) && code2.include?(code1) &&
|
94
111
|
(!year || year == year2) && (!month1 || month1 == month2) &&
|
95
112
|
corr1 == corr2 && (!coryear1 || coryear1 == coryear2) &&
|
96
|
-
buldate1 == buldate2 && (!cormonth1 || cormonth1 == cormonth2)
|
97
|
-
|
98
|
-
# i.hit[:code].match(docidrx).to_s == c
|
113
|
+
buldate1 == buldate2 && (!cormonth1 || cormonth1 == cormonth2) &&
|
114
|
+
(!ver1 || ver1 == ver2)
|
99
115
|
end
|
100
116
|
end
|
101
117
|
|
@@ -108,10 +124,11 @@ module RelatonItu
|
|
108
124
|
def isobib_results_filter(result, year)
|
109
125
|
missed_years = []
|
110
126
|
result.each do |r|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
127
|
+
/\((\d{2}\/)?(?<pyear>\d{4})\)/ =~ r.hit[:code]
|
128
|
+
if !year || year == pyear
|
129
|
+
ret = r.fetch
|
130
|
+
return { ret: ret } if ret
|
131
|
+
end
|
115
132
|
|
116
133
|
missed_years << pyear
|
117
134
|
end
|
@@ -26,8 +26,7 @@ module RelatonItu
|
|
26
26
|
# @param hash [Hash]
|
27
27
|
# @return [RelatonItu::ItuBibliographicItem]
|
28
28
|
def hash_to_bib(hash)
|
29
|
-
|
30
|
-
::RelatonItu::ItuBibliographicItem.new item_hash
|
29
|
+
::RelatonItu::ItuBibliographicItem.from_hash hash
|
31
30
|
end
|
32
31
|
|
33
32
|
# Returns hash of XML grammar
|
data/lib/relaton_itu/scrapper.rb
CHANGED
@@ -24,18 +24,18 @@ module RelatonItu
|
|
24
24
|
}.freeze
|
25
25
|
|
26
26
|
class << self
|
27
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
28
|
-
|
29
27
|
# Parse page.
|
30
|
-
# @param
|
28
|
+
# @param hit [RelatonItu::Hit]
|
31
29
|
# @return [Hash]
|
32
|
-
def parse_page(
|
33
|
-
|
30
|
+
def parse_page(hit, imp = false) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
31
|
+
doc = get_page hit
|
32
|
+
return unless doc.code == "200"
|
33
|
+
|
34
34
|
if imp
|
35
35
|
a = doc.at "//span[contains(@id, 'tab_ig_uc_rec')]/a"
|
36
36
|
return unless a
|
37
37
|
|
38
|
-
|
38
|
+
doc = get_page hit, a[:href].to_s
|
39
39
|
end
|
40
40
|
|
41
41
|
# Fetch edition.
|
@@ -44,67 +44,65 @@ module RelatonItu
|
|
44
44
|
ItuBibliographicItem.new(
|
45
45
|
fetched: Date.today.to_s,
|
46
46
|
type: "standard",
|
47
|
-
docid: fetch_docid(doc,
|
47
|
+
docid: fetch_docid(doc, hit.hit[:title]),
|
48
48
|
edition: edition,
|
49
49
|
language: ["en"],
|
50
50
|
script: ["Latn"],
|
51
51
|
title: fetch_titles(doc),
|
52
|
-
doctype:
|
52
|
+
doctype: hit.hit[:type],
|
53
53
|
docstatus: fetch_status(doc),
|
54
54
|
ics: [], # fetch_ics(doc),
|
55
55
|
date: fetch_dates(doc),
|
56
|
-
contributor: fetch_contributors(
|
57
|
-
editorialgroup: fetch_workgroup(
|
58
|
-
abstract: fetch_abstract(doc),
|
59
|
-
copyright: fetch_copyright(
|
60
|
-
link: fetch_link(doc
|
56
|
+
contributor: fetch_contributors(hit.hit[:code]),
|
57
|
+
editorialgroup: fetch_workgroup(hit.hit[:code], doc),
|
58
|
+
abstract: fetch_abstract(doc, hit),
|
59
|
+
copyright: fetch_copyright(hit.hit[:code], doc),
|
60
|
+
link: fetch_link(doc),
|
61
61
|
relation: fetch_relations(doc),
|
62
62
|
place: ["Geneva"]
|
63
63
|
)
|
64
64
|
end
|
65
|
-
# rubocop:enable Metrics/AbcSize
|
66
65
|
|
67
66
|
private
|
68
67
|
|
69
68
|
# Fetch abstracts.
|
70
|
-
# @param doc [
|
71
|
-
# @
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
69
|
+
# @param doc [Mechanize::Page]
|
70
|
+
# @param hit [RelatonItu::Hit]
|
71
|
+
# @return [Array<Hash>]
|
72
|
+
def fetch_abstract(doc, hit) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
73
|
+
abstract_url = doc.at '//table/tr/td/span[contains(@id, "lbl_dms")]/div'
|
74
|
+
content = if abstract_url
|
75
|
+
url = abstract_url[:onclick].match(/https?[^']+/).to_s
|
76
|
+
rsp = hit.hit_collection.agent.get url
|
77
|
+
d = Nokogiri::HTML rsp.body.encode(undef: :replace, replace: "")
|
78
|
+
d.css("p.MsoNormal").text.gsub(/\r\n/, "").squeeze(" ").gsub(/\u00a0/, "")
|
79
|
+
elsif a = doc.at('//table/tr/td/span[contains(@class, "observation")]/text()')
|
80
|
+
a.text.strip
|
81
|
+
end
|
82
|
+
return [] unless content
|
80
83
|
|
81
84
|
[{
|
82
|
-
content:
|
85
|
+
content: content,
|
83
86
|
language: "en",
|
84
87
|
script: "Latn",
|
85
88
|
}]
|
86
89
|
end
|
87
90
|
|
88
91
|
# Get page.
|
89
|
-
# @param
|
92
|
+
# @param hit [RelatonItu::Hit]
|
93
|
+
# @param url [String, nil]
|
90
94
|
# @return [Array<String, Nokogiri::HTML::Document>]
|
91
|
-
def get_page(url)
|
92
|
-
uri =
|
93
|
-
|
94
|
-
until resp.code == "200"
|
95
|
-
uri = URI resp["location"] if resp.code.match? /^30/
|
96
|
-
resp = Net::HTTP.get_response(uri)
|
97
|
-
end
|
98
|
-
[uri.to_s, Nokogiri::HTML(resp.body)]
|
95
|
+
def get_page(hit, url = nil)
|
96
|
+
uri = url || hit.hit[:url]
|
97
|
+
hit.hit_collection.agent.get uri
|
99
98
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
100
99
|
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
101
100
|
Net::ProtocolError, OpenSSL::SSL::SSLError
|
102
|
-
raise RelatonBib::RequestError, "Could not access #{
|
101
|
+
raise RelatonBib::RequestError, "Could not access #{uri}"
|
103
102
|
end
|
104
|
-
# rubocop:enable Metrics/MethodLength
|
105
103
|
|
106
104
|
# Fetch docid.
|
107
|
-
# @param doc [
|
105
|
+
# @param doc [Mechanize::Page]
|
108
106
|
# @param title [String]
|
109
107
|
# @return [Hash]
|
110
108
|
def fetch_docid(doc, title)
|
@@ -117,16 +115,18 @@ module RelatonItu
|
|
117
115
|
docids
|
118
116
|
end
|
119
117
|
|
120
|
-
|
118
|
+
# @param text [String]
|
119
|
+
# @return [RelatonBib::DocumentIdentifier]
|
120
|
+
def createdocid(text) # rubocop:disable Metrics/MethodLength
|
121
121
|
%r{
|
122
122
|
^(?<code>((ITU-\w|ISO\/IEC)\s)?[^\(:]+)
|
123
|
-
(\(((?<
|
123
|
+
(\(((?<_month>\d{2})\/)?(?<_year>\d{4})\))?
|
124
124
|
(:[^\(]+\((?<buldate>\d{2}\.\w{1,4}\.\d{4})\))?
|
125
125
|
(\s(?<corr>(Amd|Cor)\.\s?\d+))?
|
126
|
-
# (\s\(((?<
|
126
|
+
# (\s\(((?<_cormonth>\d{2})\/)?(?<_coryear>\d{4})\))?
|
127
127
|
}x =~ text.squeeze(" ")
|
128
128
|
corr&.sub! /\.\s?/, " "
|
129
|
-
id = [code.sub(/[[:space:]]$/, ""), corr].compact.join "
|
129
|
+
id = [code.sub(/[[:space:]]$/, ""), corr].compact.join " "
|
130
130
|
id += " - #{buldate}" if buldate
|
131
131
|
type = id.match(%r{^\w+}).to_s
|
132
132
|
type = "ITU" if type == "G"
|
@@ -134,7 +134,7 @@ module RelatonItu
|
|
134
134
|
end
|
135
135
|
|
136
136
|
# Fetch status.
|
137
|
-
# @param doc [
|
137
|
+
# @param doc [Mechanize::Page]
|
138
138
|
# @return [RelatonBib::DocumentStatus, NilClass]
|
139
139
|
def fetch_status(doc)
|
140
140
|
s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]",
|
@@ -147,7 +147,7 @@ module RelatonItu
|
|
147
147
|
|
148
148
|
# Fetch workgroup.
|
149
149
|
# @param code [String]
|
150
|
-
# @param doc [
|
150
|
+
# @param doc [Mechanize::Page]
|
151
151
|
# @return [RelatonItu::EditorialGroup, NilClass]
|
152
152
|
def fetch_workgroup(code, doc)
|
153
153
|
wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a')
|
@@ -155,8 +155,7 @@ module RelatonItu
|
|
155
155
|
|
156
156
|
group = wg && itugroup(wg.text)
|
157
157
|
EditorialGroup.new(
|
158
|
-
bureau: code.match(/(?<=-)./).to_s,
|
159
|
-
group: group
|
158
|
+
bureau: code.match(/(?<=-)./).to_s, group: group
|
160
159
|
)
|
161
160
|
end
|
162
161
|
|
@@ -176,24 +175,24 @@ module RelatonItu
|
|
176
175
|
ItuGroup.new name: name, type: type, acronym: acronym
|
177
176
|
end
|
178
177
|
|
179
|
-
# rubocop:disable Metrics/MethodLength
|
180
|
-
|
181
178
|
# Fetch relations.
|
182
|
-
# @param doc [
|
179
|
+
# @param doc [Mechanize::Page]
|
183
180
|
# @return [Array<Hash>]
|
184
181
|
def fetch_relations(doc)
|
185
|
-
doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]')
|
182
|
+
doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]')
|
183
|
+
.map do |r|
|
186
184
|
ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
|
187
|
-
fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en",
|
188
|
-
|
185
|
+
fref = RelatonBib::FormattedRef.new(content: ref.text, language: "en",
|
186
|
+
script: "Latn")
|
187
|
+
bibitem = ItuBibliographicItem.new(formattedref: fref,
|
188
|
+
type: "standard")
|
189
189
|
{ type: "complements", bibitem: bibitem }
|
190
190
|
end
|
191
191
|
end
|
192
|
-
# rubocop:enable Metrics/MethodLength
|
193
192
|
|
194
193
|
# Fetch titles.
|
195
|
-
# @param doc [
|
196
|
-
# @return [
|
194
|
+
# @param doc [Mechanize::Page]
|
195
|
+
# @return [RelatonBib::TypedTitleStringCollection]
|
197
196
|
def fetch_titles(doc)
|
198
197
|
t = doc.at("//td[@class='title']|//div/table[1]/tr[4]/td/strong")
|
199
198
|
return [] unless t
|
@@ -202,27 +201,29 @@ module RelatonItu
|
|
202
201
|
end
|
203
202
|
|
204
203
|
# Fetch dates
|
205
|
-
# @param doc [
|
204
|
+
# @param doc [Mechanize::Page]
|
206
205
|
# @return [Array<Hash>]
|
207
|
-
def fetch_dates(doc) # rubocop:disable Metrics/CyclomaticComplexity
|
206
|
+
def fetch_dates(doc) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
208
207
|
dates = []
|
209
208
|
date = doc.at("//table/tr/td/span[contains(@id, 'Label5')]",
|
210
209
|
"//p[contains(.,'Approved in')]")
|
211
210
|
pdate = date&.text&.match(/\d{4}-\d{2}-\d{2}/).to_s || ob_date(doc)
|
212
211
|
if pdate && !pdate&.empty?
|
213
212
|
dates << { type: "published", on: pdate }
|
213
|
+
elsif pdate = ob_date(doc)
|
214
|
+
dates << { type: "published", on: pdate }
|
214
215
|
end
|
215
216
|
dates
|
216
217
|
end
|
217
218
|
|
218
219
|
# Scrape Operational Bulletin date.
|
219
|
-
# @param doc [
|
220
|
+
# @param doc [Mechanize::Page]
|
220
221
|
# @return [String]
|
221
222
|
def ob_date(doc)
|
222
223
|
pdate = doc.at('//table/tbody/tr/td[contains(text(), "Year:")]')
|
223
224
|
return unless pdate
|
224
225
|
|
225
|
-
roman_to_arabic pdate.text.match(%r{(?<=Year: )\d{2}.\w
|
226
|
+
roman_to_arabic pdate.text.match(%r{(?<=Year: )(\d{2}.\w+.)?\d{4}}).to_s
|
226
227
|
end
|
227
228
|
|
228
229
|
# Convert roman month number in string date to arabic number
|
@@ -230,12 +231,15 @@ module RelatonItu
|
|
230
231
|
# @return [String]
|
231
232
|
def roman_to_arabic(date)
|
232
233
|
%r{(?<rmonth>[IVX]+)} =~ date
|
233
|
-
|
234
|
-
|
234
|
+
if ROMAN_MONTHS.index(rmonth)
|
235
|
+
month = ROMAN_MONTHS.index(rmonth) + 1
|
236
|
+
Date.parse(date.sub(%r{[IVX]+}, month.to_s)).to_s
|
237
|
+
else date
|
238
|
+
end
|
235
239
|
end
|
236
240
|
|
237
241
|
# Fetch contributors
|
238
|
-
# @param doc [
|
242
|
+
# @param doc [Mechanize::Page]
|
239
243
|
# @return [Array<Hash>]
|
240
244
|
def fetch_contributors(code)
|
241
245
|
return [] unless code
|
@@ -251,11 +255,10 @@ module RelatonItu
|
|
251
255
|
end
|
252
256
|
|
253
257
|
# Fetch links.
|
254
|
-
# @param doc [
|
255
|
-
# @param url [String]
|
258
|
+
# @param doc [Mechanize::Page]
|
256
259
|
# @return [Array<Hash>]
|
257
|
-
def fetch_link(doc
|
258
|
-
links = [{ type: "src", content:
|
260
|
+
def fetch_link(doc)
|
261
|
+
links = [{ type: "src", content: doc.uri.to_s }]
|
259
262
|
obp_elm = doc.at(
|
260
263
|
'//a[@title="Persistent link to download the PDF file"]',
|
261
264
|
"//font[contains(.,'PDF')]/../.."
|
@@ -266,6 +269,8 @@ module RelatonItu
|
|
266
269
|
links
|
267
270
|
end
|
268
271
|
|
272
|
+
# @param type [String]
|
273
|
+
# @param elm [Nokogiri::XML::Element]
|
269
274
|
def typed_link(type, elm)
|
270
275
|
{
|
271
276
|
type: type,
|
@@ -275,7 +280,7 @@ module RelatonItu
|
|
275
280
|
|
276
281
|
# Fetch copyright.
|
277
282
|
# @param code [String]
|
278
|
-
# @param doc [
|
283
|
+
# @param doc [Mechanize::Page]
|
279
284
|
# @return [Array<Hash>]
|
280
285
|
def fetch_copyright(code, doc)
|
281
286
|
abbreviation = code.match(/^[^-]+/).to_s
|
data/lib/relaton_itu/version.rb
CHANGED
data/relaton-itu.gemspec
CHANGED
@@ -26,16 +26,17 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.require_paths = ["lib"]
|
27
27
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
|
28
28
|
|
29
|
-
spec.add_development_dependency "debase"
|
29
|
+
# spec.add_development_dependency "debase"
|
30
30
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
31
31
|
spec.add_development_dependency "pry-byebug"
|
32
|
-
spec.add_development_dependency "rake", "~>
|
32
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
33
33
|
spec.add_development_dependency "rspec", "~> 3.0"
|
34
|
-
spec.add_development_dependency "ruby-debug-ide"
|
34
|
+
# spec.add_development_dependency "ruby-debug-ide"
|
35
35
|
spec.add_development_dependency "ruby-jing"
|
36
36
|
spec.add_development_dependency "simplecov"
|
37
37
|
spec.add_development_dependency "vcr", "~> 5.0.0"
|
38
38
|
spec.add_development_dependency "webmock"
|
39
39
|
|
40
|
+
spec.add_dependency "mechanize"
|
40
41
|
spec.add_dependency "relaton-bib", "~> 1.7.0"
|
41
42
|
end
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-itu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-04-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: debase
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: equivalent-xml
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,14 +44,14 @@ dependencies:
|
|
58
44
|
requirements:
|
59
45
|
- - "~>"
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
47
|
+
version: '13.0'
|
62
48
|
type: :development
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
52
|
- - "~>"
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
54
|
+
version: '13.0'
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
56
|
name: rspec
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,20 +66,6 @@ dependencies:
|
|
80
66
|
- - "~>"
|
81
67
|
- !ruby/object:Gem::Version
|
82
68
|
version: '3.0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: ruby-debug-ide
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
69
|
- !ruby/object:Gem::Dependency
|
98
70
|
name: ruby-jing
|
99
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,6 +122,20 @@ dependencies:
|
|
150
122
|
- - ">="
|
151
123
|
- !ruby/object:Gem::Version
|
152
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: mechanize
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
153
139
|
- !ruby/object:Gem::Dependency
|
154
140
|
name: relaton-bib
|
155
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -172,9 +158,7 @@ executables: []
|
|
172
158
|
extensions: []
|
173
159
|
extra_rdoc_files: []
|
174
160
|
files:
|
175
|
-
- ".github/workflows/
|
176
|
-
- ".github/workflows/ubuntu.yml"
|
177
|
-
- ".github/workflows/windows.yml"
|
161
|
+
- ".github/workflows/rake.yml"
|
178
162
|
- ".gitignore"
|
179
163
|
- ".rspec"
|
180
164
|
- ".rubocop.yml"
|
@@ -183,6 +167,7 @@ files:
|
|
183
167
|
- README.adoc
|
184
168
|
- Rakefile
|
185
169
|
- bin/console
|
170
|
+
- bin/rspec
|
186
171
|
- bin/setup
|
187
172
|
- grammars/basicdoc.rng
|
188
173
|
- grammars/biblio.rng
|
data/.github/workflows/macos.yml
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
-
# See https://github.com/metanorma/cimas
|
3
|
-
name: macos
|
4
|
-
|
5
|
-
on:
|
6
|
-
push:
|
7
|
-
branches: [ master ]
|
8
|
-
pull_request:
|
9
|
-
branches: [ '**' ]
|
10
|
-
|
11
|
-
jobs:
|
12
|
-
test-macos:
|
13
|
-
name: Test on Ruby ${{ matrix.ruby }} macOS
|
14
|
-
runs-on: macos-latest
|
15
|
-
strategy:
|
16
|
-
fail-fast: false
|
17
|
-
matrix:
|
18
|
-
ruby: [ '2.6', '2.5', '2.4' ]
|
19
|
-
steps:
|
20
|
-
- uses: actions/checkout@master
|
21
|
-
- name: Use Ruby
|
22
|
-
uses: actions/setup-ruby@v1
|
23
|
-
with:
|
24
|
-
ruby-version: ${{ matrix.ruby }}
|
25
|
-
architecture: 'x64'
|
26
|
-
- name: Update gems
|
27
|
-
run: |
|
28
|
-
sudo gem install bundler --force
|
29
|
-
ruby -v | grep 2.5 && bundle config set build.debase --with-cflags="-Wno-error=implicit-function-declaration"
|
30
|
-
ruby -v | grep 2.5 && bundle config set build.ruby-debug-ide --with-cflags="-Wno-error=implicit-function-declaration"
|
31
|
-
bundle install --jobs 4 --retry 3
|
32
|
-
- name: Run specs
|
33
|
-
run: |
|
34
|
-
bundle exec rake
|
@@ -1,33 +0,0 @@
|
|
1
|
-
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
-
# See https://github.com/metanorma/cimas
|
3
|
-
name: ubuntu
|
4
|
-
|
5
|
-
on:
|
6
|
-
push:
|
7
|
-
branches: [ master ]
|
8
|
-
pull_request:
|
9
|
-
branches: [ '**' ]
|
10
|
-
|
11
|
-
jobs:
|
12
|
-
test-linux:
|
13
|
-
name: Test on Ruby ${{ matrix.ruby }} Ubuntu
|
14
|
-
runs-on: ubuntu-latest
|
15
|
-
strategy:
|
16
|
-
fail-fast: false
|
17
|
-
matrix:
|
18
|
-
ruby: [ '2.6', '2.5', '2.4' ]
|
19
|
-
steps:
|
20
|
-
- uses: actions/checkout@master
|
21
|
-
- name: Use Ruby
|
22
|
-
uses: actions/setup-ruby@v1
|
23
|
-
with:
|
24
|
-
ruby-version: ${{ matrix.ruby }}
|
25
|
-
architecture: 'x64'
|
26
|
-
- name: Update gems
|
27
|
-
run: |
|
28
|
-
gem install bundler
|
29
|
-
bundle install --jobs 4 --retry 3
|
30
|
-
- name: Run specs
|
31
|
-
run: |
|
32
|
-
unset JAVA_TOOL_OPTIONS
|
33
|
-
bundle exec rake
|
@@ -1,35 +0,0 @@
|
|
1
|
-
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
-
# See https://github.com/metanorma/cimas
|
3
|
-
name: windows
|
4
|
-
|
5
|
-
on:
|
6
|
-
push:
|
7
|
-
branches: [ master ]
|
8
|
-
pull_request:
|
9
|
-
branches: [ '**' ]
|
10
|
-
|
11
|
-
jobs:
|
12
|
-
test-windows:
|
13
|
-
name: Test on Ruby ${{ matrix.ruby }} Windows
|
14
|
-
runs-on: windows-latest
|
15
|
-
strategy:
|
16
|
-
fail-fast: false
|
17
|
-
matrix:
|
18
|
-
ruby: [ '2.6', '2.5', '2.4' ]
|
19
|
-
steps:
|
20
|
-
- uses: actions/checkout@master
|
21
|
-
- name: Use Ruby
|
22
|
-
uses: actions/setup-ruby@v1
|
23
|
-
with:
|
24
|
-
ruby-version: ${{ matrix.ruby }}
|
25
|
-
architecture: 'x64'
|
26
|
-
- name: Update gems
|
27
|
-
shell: pwsh
|
28
|
-
run: |
|
29
|
-
gem install bundler
|
30
|
-
bundle config --local path vendor/bundle
|
31
|
-
bundle update
|
32
|
-
bundle install --jobs 4 --retry 3
|
33
|
-
- name: Run specs
|
34
|
-
run: |
|
35
|
-
bundle exec rake
|