relaton-itu 1.7.1 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +10 -0
- data/lib/relaton_itu/itu_bibliography.rb +27 -11
- data/lib/relaton_itu/scrapper.rb +24 -10
- data/lib/relaton_itu/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 994e4bcf33070a5581017d2572048c0d0c85cebc9818d44b4cb679051c3edb55
|
|
4
|
+
data.tar.gz: 367230b05352067540dce9b8dbbc8e977d31358aca4f39aa96ac529742e553e6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 679daec50ec3f9b60e1cefafff737ed644cc659fedb805550cc56c84670b3c3cb30fa327627c5728e4aa95e4b158fa7ede13612df4ef87d99c026722c2b2a5cb
|
|
7
|
+
data.tar.gz: '08f71b448a574f3088d635ef274f0316faddfec4d3097337144f46f3ec1918e61c056402bc26c660227376c9dc680fc11d3bb5903aadc433db796a4ff88a7098'
|
data/README.adoc
CHANGED
|
@@ -87,6 +87,16 @@ RelatonItu::ItuBibliography.get("ITU-T L.163", "2018", {})
|
|
|
87
87
|
...
|
|
88
88
|
----
|
|
89
89
|
|
|
90
|
+
=== Get amendment
|
|
91
|
+
[source,ruby]
|
|
92
|
+
----
|
|
93
|
+
RelatonItu::ItuBibliography.get "ITU-T G.989.2/Amd 1"
|
|
94
|
+
[relaton-itu] ("ITU-T G.989.2/Amd 1") fetching...
|
|
95
|
+
[relaton-itu] ("ITU-T G.989.2/Amd 1") found ITU-T G.989.2/Amd 1
|
|
96
|
+
=> #<RelatonItu::ItuBibliographicItem:0x007fe5409e5840
|
|
97
|
+
...
|
|
98
|
+
----
|
|
99
|
+
|
|
90
100
|
=== Create bibliographic item form YAML
|
|
91
101
|
[source,ruby]
|
|
92
102
|
----
|
|
@@ -67,14 +67,35 @@ module RelatonItu
|
|
|
67
67
|
nil
|
|
68
68
|
end
|
|
69
69
|
|
|
70
|
-
def search_filter(code)
|
|
71
|
-
|
|
72
|
-
|
|
70
|
+
def search_filter(code, year)
|
|
71
|
+
%r{
|
|
72
|
+
^(?<pref1>ITU)?(-(?<type1>\w))?\s?(?<code1>[^\s\/]+)
|
|
73
|
+
(\s\(((?<month1>\d{2})\/)?(?<year1>\d{4})\))?
|
|
74
|
+
(\s-\s(?<buldate1>\d{2}\.\w{1,4}\.\d{4}))?
|
|
75
|
+
(\/(?<corr1>(Amd|Cor)\s?\d+))?
|
|
76
|
+
(\s\(((?<cormonth1>\d{2})\/)?(?<coryear1>\d{4})\))?
|
|
77
|
+
}x =~ code
|
|
78
|
+
year ||= year1
|
|
79
|
+
# docidrx = %r{\w+\.\d+|\w\sSuppl\.\s\d+} # %r{^ITU-T\s[^\s]+}
|
|
80
|
+
# c = code.sub(/Imp\s?/, "").match(docidrx).to_s
|
|
73
81
|
warn "[relaton-itu] (\"#{code}\") fetching..."
|
|
74
82
|
result = search(code)
|
|
83
|
+
code1.sub! /(?<=\.)Imp(?=\d)/, "" if result.gi_imp
|
|
75
84
|
result.select do |i|
|
|
76
|
-
|
|
77
|
-
|
|
85
|
+
%r{
|
|
86
|
+
^(?<pref2>ITU)?(-(?<type2>\w))?\s?(?<code2>[\S]+)
|
|
87
|
+
(\s\(((?<month2>\d{2})\/)?(?<year2>\d{4})\))?
|
|
88
|
+
(\s(?<corr2>(Amd|Cor)\.\s?\d+))?
|
|
89
|
+
(\s\(((?<cormonth2>\d{2})\/)?(?<coryear2>\d{4})\))?
|
|
90
|
+
}x =~ i.hit[:code]
|
|
91
|
+
/:[^\(]+\((?<buldate2>\d{2}\.\w{1,4}\.\d{4})\)/ =~ i.hit[:title]
|
|
92
|
+
corr2&.sub! /\.\s?/, " "
|
|
93
|
+
pref1 == pref2 && (!type1 || type1 == type2) && code1 == code2 &&
|
|
94
|
+
(!year || year == year2) && (!month1 || month1 == month2) &&
|
|
95
|
+
corr1 == corr2 && (!coryear1 || coryear1 == coryear2) &&
|
|
96
|
+
buldate1 == buldate2 && (!cormonth1 || cormonth1 == cormonth2)
|
|
97
|
+
# i.hit[:code] &&
|
|
98
|
+
# i.hit[:code].match(docidrx).to_s == c
|
|
78
99
|
end
|
|
79
100
|
end
|
|
80
101
|
|
|
@@ -86,24 +107,19 @@ module RelatonItu
|
|
|
86
107
|
# If no match, returns any years which caused mismatch, for error reporting
|
|
87
108
|
def isobib_results_filter(result, year)
|
|
88
109
|
missed_years = []
|
|
89
|
-
# result.each_slice(3) do |s| # ISO website only allows 3 connections
|
|
90
|
-
# fetch_pages(s, 3).each do |r|
|
|
91
110
|
result.each do |r|
|
|
92
111
|
return { ret: r.fetch } if !year
|
|
93
112
|
|
|
94
113
|
/\(\d{2}\/(?<pyear>\d{4})\)/ =~ r.hit[:code]
|
|
95
|
-
# r.date.select { |d| d.type == "published" }.each do |d|
|
|
96
114
|
return { ret: r.fetch } if year == pyear
|
|
97
115
|
|
|
98
116
|
missed_years << pyear
|
|
99
|
-
# end
|
|
100
|
-
# end
|
|
101
117
|
end
|
|
102
118
|
{ years: missed_years }
|
|
103
119
|
end
|
|
104
120
|
|
|
105
121
|
def itubib_get1(code, year, _opts)
|
|
106
|
-
result = search_filter(code) || return
|
|
122
|
+
result = search_filter(code, year) || return
|
|
107
123
|
ret = isobib_results_filter(result, year)
|
|
108
124
|
if ret[:ret]
|
|
109
125
|
warn "[relaton-itu] (\"#{code}\") found #{ret[:ret].docidentifier.first&.id}"
|
data/lib/relaton_itu/scrapper.rb
CHANGED
|
@@ -44,7 +44,7 @@ module RelatonItu
|
|
|
44
44
|
ItuBibliographicItem.new(
|
|
45
45
|
fetched: Date.today.to_s,
|
|
46
46
|
type: "standard",
|
|
47
|
-
docid: fetch_docid(doc),
|
|
47
|
+
docid: fetch_docid(doc, hit_data[:title]),
|
|
48
48
|
edition: edition,
|
|
49
49
|
language: ["en"],
|
|
50
50
|
script: ["Latn"],
|
|
@@ -105,18 +105,32 @@ module RelatonItu
|
|
|
105
105
|
|
|
106
106
|
# Fetch docid.
|
|
107
107
|
# @param doc [Nokogiri::HTML::Document]
|
|
108
|
+
# @param title [String]
|
|
108
109
|
# @return [Hash]
|
|
109
|
-
def fetch_docid(doc)
|
|
110
|
-
doc.xpath(
|
|
110
|
+
def fetch_docid(doc, title)
|
|
111
|
+
docids = doc.xpath(
|
|
111
112
|
"//span[@id='ctl00_content_main_uc_rec_main_info1_rpt_main_ctl00_lbl_rec']",
|
|
112
113
|
"//td[.='Identical standard:']/following-sibling::td",
|
|
113
|
-
"//div/table[1]/tr[4]/td/strong"
|
|
114
|
-
).map
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
114
|
+
"//div/table[1]/tr[4]/td/strong"
|
|
115
|
+
).map { |c| createdocid c.text }
|
|
116
|
+
docids << createdocid(title) unless docids.any?
|
|
117
|
+
docids
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def createdocid(text)
|
|
121
|
+
%r{
|
|
122
|
+
^(?<code>((ITU-\w|ISO\/IEC)\s)?[^\(:]+)
|
|
123
|
+
(\(((?<month>\d{2})\/)?(?<year>\d{4})\))?
|
|
124
|
+
(:[^\(]+\((?<buldate>\d{2}\.\w{1,4}\.\d{4})\))?
|
|
125
|
+
(\s(?<corr>(Amd|Cor)\.\s?\d+))?
|
|
126
|
+
# (\s\(((?<cormonth>\d{2})\/)?(?<coryear>\d{4})\))?
|
|
127
|
+
}x =~ text.squeeze(" ")
|
|
128
|
+
corr&.sub! /\.\s?/, " "
|
|
129
|
+
id = [code.sub(/[[:space:]]$/, ""), corr].compact.join "/"
|
|
130
|
+
id += " - #{buldate}" if buldate
|
|
131
|
+
type = id.match(%r{^\w+}).to_s
|
|
132
|
+
type = "ITU" if type == "G"
|
|
133
|
+
RelatonBib::DocumentIdentifier.new(type: type, id: id)
|
|
120
134
|
end
|
|
121
135
|
|
|
122
136
|
# Fetch status.
|
data/lib/relaton_itu/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-itu
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.7.
|
|
4
|
+
version: 1.7.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-12-
|
|
11
|
+
date: 2020-12-08 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: debase
|