relaton-itu 1.19.1 → 1.19.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/relaton_itu/hit_collection.rb +1 -1
- data/lib/relaton_itu/pubid.rb +43 -4
- data/lib/relaton_itu/scrapper.rb +50 -29
- data/lib/relaton_itu/version.rb +1 -1
- metadata +3 -3
- /data/{relaton-itu.gemspec → relaton_itu.gemspec} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6eb58814c2b6eb0575c22b0e73606a7e54b0011f412e7d797c8be92d150a31f9
|
4
|
+
data.tar.gz: 2cf52ac96e85e7f916c0c7efc452c81514483c52553e02171cc562b1c0557027
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e869a3131ab07a3601eedb46e5d62a6a4ac92e638c1c5e362003193d24b699bf829ffa2eade9705a01a5073f63e22f311686500c8a21fde958f4c309b72dc35
|
7
|
+
data.tar.gz: 57531fcb780e7ec0950628efae847101c4daef24d68809e53ff3d7934818fe21ab794f027bf2dcc0ad73b50137693274ec960a8cc5368e03f6fe523f7fa436ad
|
data/lib/relaton_itu/pubid.rb
CHANGED
@@ -18,11 +18,15 @@ module RelatonItu
|
|
18
18
|
rule(:month1) { num.repeat(2, 2).as(:month) }
|
19
19
|
rule(:date1) { str(" (") >> (month1 >> str("/")).maybe >> year >> str(")") }
|
20
20
|
rule(:month2) { match["IVX"].repeat(1, 3).as(:month) }
|
21
|
-
rule(:date2) { str(" - ") >> num.repeat(2, 2) >> dot >> month2 >> dot >> year }
|
21
|
+
rule(:date2) { str(" - ") >> num.repeat(2, 2).as(:day) >> dot >> month2 >> dot >> year }
|
22
22
|
rule(:date) { date1 | date2 }
|
23
23
|
rule(:date?) { date.maybe }
|
24
24
|
|
25
|
-
rule(:
|
25
|
+
rule(:amd_month) { num.repeat(2, 2) }
|
26
|
+
rule(:amd_year) { num.repeat(4, 4) }
|
27
|
+
rule(:amd_date) { str(" (") >> (amd_month >> str("/") >> amd_year).as(:amd_date) >> str(")") }
|
28
|
+
rule(:amd_date?) { amd_date.maybe }
|
29
|
+
rule(:amd) { space >> (str("Amd") | str("Amendment")) >> dot? >> space >> num.repeat(1, 2).as(:amd) >> amd_date? }
|
26
30
|
rule(:amd?) { amd.maybe }
|
27
31
|
|
28
32
|
rule(:sup) { space >> str("Suppl") >> dot? >> space >> num.repeat(1, 2).as(:suppl) }
|
@@ -31,11 +35,14 @@ module RelatonItu
|
|
31
35
|
rule(:annex) { space >> str("Annex") >> space >> match["[:alnum:]"].repeat(1, 2).as(:annex) }
|
32
36
|
rule(:annex?) { annex.maybe }
|
33
37
|
|
34
|
-
rule(:
|
38
|
+
rule(:ver) { space >> str("(V") >> num.repeat(1, 2).as(:version) >> str(")") }
|
39
|
+
rule(:ver?) { ver.maybe }
|
40
|
+
|
41
|
+
rule(:itu_pubid) { prefix >> sector >> type? >> code >> sup? >> annex? >> ver? >> date? >> amd? >> any.repeat }
|
35
42
|
root(:itu_pubid)
|
36
43
|
end
|
37
44
|
|
38
|
-
attr_accessor :prefix, :sector, :type, :code, :suppl, :annex, :year, :month, :amd
|
45
|
+
attr_accessor :prefix, :sector, :type, :code, :suppl, :annex, :version, :year, :month, :day, :amd, :amd_date
|
39
46
|
|
40
47
|
#
|
41
48
|
# Create a new ITU publication identifier.
|
@@ -45,20 +52,26 @@ module RelatonItu
|
|
45
52
|
# @param [String, nil] type
|
46
53
|
# @param [String] code
|
47
54
|
# @param [String, nil] suppl number
|
55
|
+
# @param [String, nil] version
|
48
56
|
# @param [String, nil] year
|
49
57
|
# @param [String, nil] month
|
58
|
+
# @param [String, nil] day
|
50
59
|
# @param [String, nil] amd amendment number
|
60
|
+
# @param [String, nil] amd_date amendment
|
51
61
|
#
|
52
62
|
def initialize(prefix:, sector:, code:, **args)
|
53
63
|
@prefix = prefix
|
54
64
|
@sector = sector
|
55
65
|
@type = args[:type]
|
66
|
+
@day = args[:day]
|
56
67
|
@code, year, month = date_from_code code
|
57
68
|
@suppl = args[:suppl]
|
58
69
|
@annex = args[:annex]
|
70
|
+
@version = args[:version]
|
59
71
|
@year = args[:year] || year
|
60
72
|
@month = roman_to_2digit args[:month] || month
|
61
73
|
@amd = args[:amd]
|
74
|
+
@amd_date = args[:amd_date]
|
62
75
|
end
|
63
76
|
|
64
77
|
def self.parse(id)
|
@@ -75,9 +88,12 @@ module RelatonItu
|
|
75
88
|
hash[:type] = type if type && with_type
|
76
89
|
hash[:suppl] = suppl if suppl
|
77
90
|
hash[:annex] = annex if annex
|
91
|
+
hash[:version] = version if version
|
78
92
|
hash[:year] = year if year
|
79
93
|
hash[:month] = month if month
|
94
|
+
hash[:day] = day if day
|
80
95
|
hash[:amd] = amd if amd
|
96
|
+
hash[:amd_date] = amd_date if amd_date
|
81
97
|
hash
|
82
98
|
end
|
83
99
|
|
@@ -91,18 +107,26 @@ module RelatonItu
|
|
91
107
|
s << " #{code}"
|
92
108
|
s << " Suppl. #{suppl}" if suppl
|
93
109
|
s << " Annex #{annex}" if annex
|
110
|
+
s << " (V#{version})" if version
|
94
111
|
s << date_to_s
|
95
112
|
s << " Amd #{amd}" if amd
|
113
|
+
s << " (#{amd_date})" if amd_date
|
96
114
|
s
|
97
115
|
end
|
98
116
|
|
99
117
|
def ===(other, ignore_args = [])
|
100
118
|
hash = to_h with_type: false
|
101
119
|
other_hash = other.to_h with_type: false
|
120
|
+
hash.delete(:version) if ignore_args.include?(:version)
|
121
|
+
other_hash.delete(:version) unless hash[:version]
|
122
|
+
hash.delete(:day)
|
123
|
+
other_hash.delete(:day)
|
102
124
|
hash.delete(:month)
|
103
125
|
other_hash.delete(:month)
|
104
126
|
hash.delete(:year) if ignore_args.include?(:year)
|
105
127
|
other_hash.delete(:year) unless hash[:year]
|
128
|
+
hash.delete(:amd_date) if ignore_args.include?(:amd_date)
|
129
|
+
other_hash.delete(:amd_date) unless hash[:amd_date]
|
106
130
|
hash == other_hash
|
107
131
|
end
|
108
132
|
|
@@ -133,7 +157,22 @@ module RelatonItu
|
|
133
157
|
end.to_s.rjust(2, "0")
|
134
158
|
end
|
135
159
|
|
160
|
+
def month_to_roman
|
161
|
+
int = month.to_i
|
162
|
+
return month unless int.between? 1, 12
|
163
|
+
|
164
|
+
roman_tens = ["", "X"]
|
165
|
+
roman_units = ["", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX"]
|
166
|
+
|
167
|
+
tens = int / 10
|
168
|
+
units = int % 10
|
169
|
+
|
170
|
+
roman_tens[tens] + roman_units[units]
|
171
|
+
end
|
172
|
+
|
136
173
|
def date_to_s
|
174
|
+
# if code.match?(/^OB\./) && day && month
|
175
|
+
# " - #{day}.#{month_to_roman}.#{year}"
|
137
176
|
if month && year then " (#{month}/#{year})"
|
138
177
|
elsif year then " (#{year})"
|
139
178
|
else ""
|
data/lib/relaton_itu/scrapper.rb
CHANGED
@@ -39,12 +39,14 @@ module RelatonItu
|
|
39
39
|
end
|
40
40
|
|
41
41
|
# Fetch edition.
|
42
|
-
edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b")&.text
|
42
|
+
edition = doc.at("//table/tr/td[contains(@style,'color: white')]/span[contains(@id, 'Label8')]/b")&.text
|
43
|
+
docid = fetch_docid(doc, hit)
|
43
44
|
|
44
45
|
ItuBibliographicItem.new(
|
46
|
+
id: fetch_id(docid),
|
45
47
|
fetched: Date.today.to_s,
|
46
48
|
type: "standard",
|
47
|
-
docid:
|
49
|
+
docid: docid,
|
48
50
|
edition: edition,
|
49
51
|
language: ["en"],
|
50
52
|
script: ["Latn"],
|
@@ -65,20 +67,24 @@ module RelatonItu
|
|
65
67
|
|
66
68
|
private
|
67
69
|
|
70
|
+
def fetch_id(docid)
|
71
|
+
docid.find(&:primary).id.gsub(/[.\s()\/-]/, "")
|
72
|
+
end
|
73
|
+
|
68
74
|
# Fetch abstracts.
|
69
75
|
# @param doc [Mechanize::Page]
|
70
76
|
# @param hit [RelatonItu::Hit]
|
71
77
|
# @return [Array<Hash>]
|
72
78
|
def fetch_abstract(doc, hit) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
73
|
-
abstract_url = doc.at '//table/tr
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
79
|
+
abstract_url = doc.at '//table/tr/td[contains(@style,"color: white")]/span[contains(@id, "lbl_dms")]/div'
|
80
|
+
if abstract_url
|
81
|
+
url = abstract_url[:onclick].match(/https?[^']+/).to_s
|
82
|
+
rsp = hit.hit_collection.agent.get url
|
83
|
+
d = Nokogiri::HTML rsp.body.encode(undef: :replace, replace: "")
|
84
|
+
d.css("p.MsoNormal").text.gsub("\r\n", "").squeeze(" ").gsub("\u00a0", "")
|
85
|
+
elsif a = doc.at('//table/tr/td/span[contains(@class, "observation")]/text()')
|
86
|
+
a.text.strip
|
87
|
+
end => content
|
82
88
|
return [] unless content
|
83
89
|
|
84
90
|
[{
|
@@ -106,33 +112,48 @@ module RelatonItu
|
|
106
112
|
|
107
113
|
# Fetch docid.
|
108
114
|
# @param doc [Mechanize::Page]
|
109
|
-
# @param
|
115
|
+
# @param hit [RelatonItu::Hit]
|
110
116
|
# @return [Hash]
|
111
|
-
def fetch_docid(doc,
|
112
|
-
docids =
|
117
|
+
def fetch_docid(doc, hit)
|
118
|
+
docids = hit.hit[:code].to_s.split(" | ").map { |c| createdocid(c) }
|
119
|
+
docids += parse_id(doc).map { |c| createdocid c.text } if docids.empty?
|
120
|
+
docids << createdocid(title) unless docids.any?
|
121
|
+
docids
|
122
|
+
end
|
123
|
+
|
124
|
+
def parse_id(doc)
|
125
|
+
doc.xpath(
|
113
126
|
"//span[@id='ctl00_content_main_uc_rec_main_info1_rpt_main_ctl00_lbl_rec']",
|
114
127
|
"//td[.='Identical standard:']/following-sibling::td",
|
115
128
|
"//div/table[1]/tr[4]/td/strong",
|
116
|
-
)
|
117
|
-
docids << createdocid(title) unless docids.any?
|
118
|
-
docids
|
129
|
+
)
|
119
130
|
end
|
120
131
|
|
121
132
|
# @param text [String]
|
122
133
|
# @return [RelatonBib::DocumentIdentifier]
|
123
134
|
def createdocid(text) # rubocop:disable Metrics/MethodLength
|
124
|
-
%r{
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
id
|
134
|
-
|
135
|
-
|
135
|
+
# %r{
|
136
|
+
# ^(?<code>(?:(?:ITU-\w|ISO/IEC)\s)?[^(:]*)
|
137
|
+
# (?:\s\(V(?<version>\d+)\))?
|
138
|
+
# (?:\s\((?:(?<_month>\d{2})/)?(?<_year>\d{4})\))?
|
139
|
+
# (?::[^(]+\((?<buldate>\d{2}\.\w{1,4}\.\d{4})\))?
|
140
|
+
# (?:\s(?<corr>(?:Amd|Cor)\.\s?\d+))?
|
141
|
+
# # (\s\(((?<_cormonth>\d{2})\/)?(?<_coryear>\d{4})\))?
|
142
|
+
# }x =~ text.squeeze(" ")
|
143
|
+
# corr&.sub!(/\.\s?/, " ")
|
144
|
+
# id = [code.sub(/[[:space:]]$/, ""), corr].compact.join " "
|
145
|
+
# id += " (V#{version})" if version
|
146
|
+
# id += " - #{buldate}" if buldate
|
147
|
+
# type = id.match(%r{^\w+}).to_s
|
148
|
+
# type = "ITU" if type == "G"
|
149
|
+
if text.match?(/^(?:ISO|ETSI)/)
|
150
|
+
type = "ISO"
|
151
|
+
text.match(/[^(]+/).to_s.strip.squeeze(" ")
|
152
|
+
else
|
153
|
+
pubid = Pubid.parse(text)
|
154
|
+
type = pubid.prefix # == "G" ? "ITU" : pubid.prefix
|
155
|
+
pubid.to_s
|
156
|
+
end => id
|
136
157
|
RelatonBib::DocumentIdentifier.new(type: type, id: id, primary: true)
|
137
158
|
end
|
138
159
|
|
data/lib/relaton_itu/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-itu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.19.
|
4
|
+
version: 1.19.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-10-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -109,7 +109,7 @@ files:
|
|
109
109
|
- lib/relaton_itu/util.rb
|
110
110
|
- lib/relaton_itu/version.rb
|
111
111
|
- lib/relaton_itu/xml_parser.rb
|
112
|
-
-
|
112
|
+
- relaton_itu.gemspec
|
113
113
|
homepage: https://github.com/metanorma/relaton-itu
|
114
114
|
licenses:
|
115
115
|
- MIT
|
File without changes
|