relaton-itu 1.5.0 → 1.7.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/macos.yml +2 -0
- data/README.adoc +10 -0
- data/grammars/isodoc.rng +14 -43
- data/grammars/itu.rng +1 -0
- data/lib/relaton_itu/hit_collection.rb +5 -6
- data/lib/relaton_itu/itu_bibliography.rb +27 -11
- data/lib/relaton_itu/scrapper.rb +24 -10
- data/lib/relaton_itu/version.rb +1 -1
- data/relaton-itu.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 994e4bcf33070a5581017d2572048c0d0c85cebc9818d44b4cb679051c3edb55
|
4
|
+
data.tar.gz: 367230b05352067540dce9b8dbbc8e977d31358aca4f39aa96ac529742e553e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 679daec50ec3f9b60e1cefafff737ed644cc659fedb805550cc56c84670b3c3cb30fa327627c5728e4aa95e4b158fa7ede13612df4ef87d99c026722c2b2a5cb
|
7
|
+
data.tar.gz: '08f71b448a574f3088d635ef274f0316faddfec4d3097337144f46f3ec1918e61c056402bc26c660227376c9dc680fc11d3bb5903aadc433db796a4ff88a7098'
|
data/.github/workflows/macos.yml
CHANGED
@@ -26,6 +26,8 @@ jobs:
|
|
26
26
|
- name: Update gems
|
27
27
|
run: |
|
28
28
|
sudo gem install bundler --force
|
29
|
+
ruby -v | grep 2.5 && bundle config set build.debase --with-cflags="-Wno-error=implicit-function-declaration"
|
30
|
+
ruby -v | grep 2.5 && bundle config set build.ruby-debug-ide --with-cflags="-Wno-error=implicit-function-declaration"
|
29
31
|
bundle install --jobs 4 --retry 3
|
30
32
|
- name: Run specs
|
31
33
|
run: |
|
data/README.adoc
CHANGED
@@ -87,6 +87,16 @@ RelatonItu::ItuBibliography.get("ITU-T L.163", "2018", {})
|
|
87
87
|
...
|
88
88
|
----
|
89
89
|
|
90
|
+
=== Get amendment
|
91
|
+
[source,ruby]
|
92
|
+
----
|
93
|
+
RelatonItu::ItuBibliography.get "ITU-T G.989.2/Amd 1"
|
94
|
+
[relaton-itu] ("ITU-T G.989.2/Amd 1") fetching...
|
95
|
+
[relaton-itu] ("ITU-T G.989.2/Amd 1") found ITU-T G.989.2/Amd 1
|
96
|
+
=> #<RelatonItu::ItuBibliographicItem:0x007fe5409e5840
|
97
|
+
...
|
98
|
+
----
|
99
|
+
|
90
100
|
=== Create bibliographic item form YAML
|
91
101
|
[source,ruby]
|
92
102
|
----
|
data/grammars/isodoc.rng
CHANGED
@@ -24,6 +24,14 @@
|
|
24
24
|
<start>
|
25
25
|
<ref name="standard-document"/>
|
26
26
|
</start>
|
27
|
+
<define name="doctype">
|
28
|
+
<element name="doctype">
|
29
|
+
<optional>
|
30
|
+
<attribute name="abbreviation"/>
|
31
|
+
</optional>
|
32
|
+
<ref name="DocumentType"/>
|
33
|
+
</element>
|
34
|
+
</define>
|
27
35
|
<define name="hyperlink">
|
28
36
|
<element name="link">
|
29
37
|
<attribute name="target">
|
@@ -141,6 +149,11 @@
|
|
141
149
|
<data type="boolean"/>
|
142
150
|
</attribute>
|
143
151
|
</optional>
|
152
|
+
<optional>
|
153
|
+
<attribute name="key">
|
154
|
+
<data type="boolean"/>
|
155
|
+
</attribute>
|
156
|
+
</optional>
|
144
157
|
<oneOrMore>
|
145
158
|
<ref name="dt"/>
|
146
159
|
<ref name="dd"/>
|
@@ -1164,49 +1177,7 @@
|
|
1164
1177
|
</define>
|
1165
1178
|
<define name="annex">
|
1166
1179
|
<element name="annex">
|
1167
|
-
<
|
1168
|
-
<attribute name="id">
|
1169
|
-
<data type="ID"/>
|
1170
|
-
</attribute>
|
1171
|
-
</optional>
|
1172
|
-
<optional>
|
1173
|
-
<attribute name="language"/>
|
1174
|
-
</optional>
|
1175
|
-
<optional>
|
1176
|
-
<attribute name="script"/>
|
1177
|
-
</optional>
|
1178
|
-
<optional>
|
1179
|
-
<attribute name="inline-header">
|
1180
|
-
<data type="boolean"/>
|
1181
|
-
</attribute>
|
1182
|
-
</optional>
|
1183
|
-
<attribute name="obligation">
|
1184
|
-
<choice>
|
1185
|
-
<value>normative</value>
|
1186
|
-
<value>informative</value>
|
1187
|
-
</choice>
|
1188
|
-
</attribute>
|
1189
|
-
<optional>
|
1190
|
-
<ref name="section-title"/>
|
1191
|
-
</optional>
|
1192
|
-
<group>
|
1193
|
-
<group>
|
1194
|
-
<zeroOrMore>
|
1195
|
-
<ref name="BasicBlock"/>
|
1196
|
-
</zeroOrMore>
|
1197
|
-
<zeroOrMore>
|
1198
|
-
<ref name="note"/>
|
1199
|
-
</zeroOrMore>
|
1200
|
-
</group>
|
1201
|
-
<zeroOrMore>
|
1202
|
-
<choice>
|
1203
|
-
<ref name="annex-subsection"/>
|
1204
|
-
<ref name="terms"/>
|
1205
|
-
<ref name="definitions"/>
|
1206
|
-
<ref name="references"/>
|
1207
|
-
</choice>
|
1208
|
-
</zeroOrMore>
|
1209
|
-
</group>
|
1180
|
+
<ref name="Annex-Section"/>
|
1210
1181
|
</element>
|
1211
1182
|
</define>
|
1212
1183
|
<define name="terms">
|
data/grammars/itu.rng
CHANGED
@@ -29,7 +29,7 @@ module RelatonItu
|
|
29
29
|
|
30
30
|
# @return [String]
|
31
31
|
def group
|
32
|
-
@group ||= if %r{
|
32
|
+
@group ||= if %r{OB|Operational Bulletin}.match? text then "Publications"
|
33
33
|
else "Recommendations"
|
34
34
|
end
|
35
35
|
end
|
@@ -54,7 +54,7 @@ module RelatonItu
|
|
54
54
|
"Selected" => false,
|
55
55
|
"Value" => "",
|
56
56
|
"Label" => "Name",
|
57
|
-
"Target" => "
|
57
|
+
"Target" => "\\/name_s",
|
58
58
|
"TypeName" => "CHECKBOX",
|
59
59
|
"GetCriteriaType" => 0,
|
60
60
|
},
|
@@ -62,7 +62,7 @@ module RelatonItu
|
|
62
62
|
"Selected" => false,
|
63
63
|
"Value" => "",
|
64
64
|
"Label" => "Short description",
|
65
|
-
"Target" => "
|
65
|
+
"Target" => "\\/short_description_s",
|
66
66
|
"TypeName" => "CHECKBOX",
|
67
67
|
"GetCriteriaType" => 0,
|
68
68
|
},
|
@@ -70,7 +70,7 @@ module RelatonItu
|
|
70
70
|
"Selected" => false,
|
71
71
|
"Value" => "",
|
72
72
|
"Label" => "File content",
|
73
|
-
"Target" => "
|
73
|
+
"Target" => "\\/file",
|
74
74
|
"TypeName" => "CHECKBOX",
|
75
75
|
"GetCriteriaType" => 0,
|
76
76
|
},
|
@@ -79,9 +79,8 @@ module RelatonItu
|
|
79
79
|
"Selected" => false,
|
80
80
|
}],
|
81
81
|
"Topics" => "",
|
82
|
-
"ClientData" => {
|
82
|
+
"ClientData" => {},
|
83
83
|
"Language" => "en",
|
84
|
-
"IP" => "",
|
85
84
|
"SearchType" => "All",
|
86
85
|
}
|
87
86
|
end
|
@@ -67,14 +67,35 @@ module RelatonItu
|
|
67
67
|
nil
|
68
68
|
end
|
69
69
|
|
70
|
-
def search_filter(code)
|
71
|
-
|
72
|
-
|
70
|
+
def search_filter(code, year)
|
71
|
+
%r{
|
72
|
+
^(?<pref1>ITU)?(-(?<type1>\w))?\s?(?<code1>[^\s\/]+)
|
73
|
+
(\s\(((?<month1>\d{2})\/)?(?<year1>\d{4})\))?
|
74
|
+
(\s-\s(?<buldate1>\d{2}\.\w{1,4}\.\d{4}))?
|
75
|
+
(\/(?<corr1>(Amd|Cor)\s?\d+))?
|
76
|
+
(\s\(((?<cormonth1>\d{2})\/)?(?<coryear1>\d{4})\))?
|
77
|
+
}x =~ code
|
78
|
+
year ||= year1
|
79
|
+
# docidrx = %r{\w+\.\d+|\w\sSuppl\.\s\d+} # %r{^ITU-T\s[^\s]+}
|
80
|
+
# c = code.sub(/Imp\s?/, "").match(docidrx).to_s
|
73
81
|
warn "[relaton-itu] (\"#{code}\") fetching..."
|
74
82
|
result = search(code)
|
83
|
+
code1.sub! /(?<=\.)Imp(?=\d)/, "" if result.gi_imp
|
75
84
|
result.select do |i|
|
76
|
-
|
77
|
-
|
85
|
+
%r{
|
86
|
+
^(?<pref2>ITU)?(-(?<type2>\w))?\s?(?<code2>[\S]+)
|
87
|
+
(\s\(((?<month2>\d{2})\/)?(?<year2>\d{4})\))?
|
88
|
+
(\s(?<corr2>(Amd|Cor)\.\s?\d+))?
|
89
|
+
(\s\(((?<cormonth2>\d{2})\/)?(?<coryear2>\d{4})\))?
|
90
|
+
}x =~ i.hit[:code]
|
91
|
+
/:[^\(]+\((?<buldate2>\d{2}\.\w{1,4}\.\d{4})\)/ =~ i.hit[:title]
|
92
|
+
corr2&.sub! /\.\s?/, " "
|
93
|
+
pref1 == pref2 && (!type1 || type1 == type2) && code1 == code2 &&
|
94
|
+
(!year || year == year2) && (!month1 || month1 == month2) &&
|
95
|
+
corr1 == corr2 && (!coryear1 || coryear1 == coryear2) &&
|
96
|
+
buldate1 == buldate2 && (!cormonth1 || cormonth1 == cormonth2)
|
97
|
+
# i.hit[:code] &&
|
98
|
+
# i.hit[:code].match(docidrx).to_s == c
|
78
99
|
end
|
79
100
|
end
|
80
101
|
|
@@ -86,24 +107,19 @@ module RelatonItu
|
|
86
107
|
# If no match, returns any years which caused mismatch, for error reporting
|
87
108
|
def isobib_results_filter(result, year)
|
88
109
|
missed_years = []
|
89
|
-
# result.each_slice(3) do |s| # ISO website only allows 3 connections
|
90
|
-
# fetch_pages(s, 3).each do |r|
|
91
110
|
result.each do |r|
|
92
111
|
return { ret: r.fetch } if !year
|
93
112
|
|
94
113
|
/\(\d{2}\/(?<pyear>\d{4})\)/ =~ r.hit[:code]
|
95
|
-
# r.date.select { |d| d.type == "published" }.each do |d|
|
96
114
|
return { ret: r.fetch } if year == pyear
|
97
115
|
|
98
116
|
missed_years << pyear
|
99
|
-
# end
|
100
|
-
# end
|
101
117
|
end
|
102
118
|
{ years: missed_years }
|
103
119
|
end
|
104
120
|
|
105
121
|
def itubib_get1(code, year, _opts)
|
106
|
-
result = search_filter(code) || return
|
122
|
+
result = search_filter(code, year) || return
|
107
123
|
ret = isobib_results_filter(result, year)
|
108
124
|
if ret[:ret]
|
109
125
|
warn "[relaton-itu] (\"#{code}\") found #{ret[:ret].docidentifier.first&.id}"
|
data/lib/relaton_itu/scrapper.rb
CHANGED
@@ -44,7 +44,7 @@ module RelatonItu
|
|
44
44
|
ItuBibliographicItem.new(
|
45
45
|
fetched: Date.today.to_s,
|
46
46
|
type: "standard",
|
47
|
-
docid: fetch_docid(doc),
|
47
|
+
docid: fetch_docid(doc, hit_data[:title]),
|
48
48
|
edition: edition,
|
49
49
|
language: ["en"],
|
50
50
|
script: ["Latn"],
|
@@ -105,18 +105,32 @@ module RelatonItu
|
|
105
105
|
|
106
106
|
# Fetch docid.
|
107
107
|
# @param doc [Nokogiri::HTML::Document]
|
108
|
+
# @param title [String]
|
108
109
|
# @return [Hash]
|
109
|
-
def fetch_docid(doc)
|
110
|
-
doc.xpath(
|
110
|
+
def fetch_docid(doc, title)
|
111
|
+
docids = doc.xpath(
|
111
112
|
"//span[@id='ctl00_content_main_uc_rec_main_info1_rpt_main_ctl00_lbl_rec']",
|
112
113
|
"//td[.='Identical standard:']/following-sibling::td",
|
113
|
-
"//div/table[1]/tr[4]/td/strong"
|
114
|
-
).map
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
114
|
+
"//div/table[1]/tr[4]/td/strong"
|
115
|
+
).map { |c| createdocid c.text }
|
116
|
+
docids << createdocid(title) unless docids.any?
|
117
|
+
docids
|
118
|
+
end
|
119
|
+
|
120
|
+
def createdocid(text)
|
121
|
+
%r{
|
122
|
+
^(?<code>((ITU-\w|ISO\/IEC)\s)?[^\(:]+)
|
123
|
+
(\(((?<month>\d{2})\/)?(?<year>\d{4})\))?
|
124
|
+
(:[^\(]+\((?<buldate>\d{2}\.\w{1,4}\.\d{4})\))?
|
125
|
+
(\s(?<corr>(Amd|Cor)\.\s?\d+))?
|
126
|
+
# (\s\(((?<cormonth>\d{2})\/)?(?<coryear>\d{4})\))?
|
127
|
+
}x =~ text.squeeze(" ")
|
128
|
+
corr&.sub! /\.\s?/, " "
|
129
|
+
id = [code.sub(/[[:space:]]$/, ""), corr].compact.join "/"
|
130
|
+
id += " - #{buldate}" if buldate
|
131
|
+
type = id.match(%r{^\w+}).to_s
|
132
|
+
type = "ITU" if type == "G"
|
133
|
+
RelatonBib::DocumentIdentifier.new(type: type, id: id)
|
120
134
|
end
|
121
135
|
|
122
136
|
# Fetch status.
|
data/lib/relaton_itu/version.rb
CHANGED
data/relaton-itu.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-itu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|
@@ -156,14 +156,14 @@ dependencies:
|
|
156
156
|
requirements:
|
157
157
|
- - "~>"
|
158
158
|
- !ruby/object:Gem::Version
|
159
|
-
version: 1.
|
159
|
+
version: 1.7.0
|
160
160
|
type: :runtime
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
164
|
- - "~>"
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version: 1.
|
166
|
+
version: 1.7.0
|
167
167
|
description: 'RelatonItu: retrieve ITU Standards for bibliographic use using the BibliographicItem
|
168
168
|
model'
|
169
169
|
email:
|