iev 0.3.1 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ # (c) Copyright 2020 Ribose Inc.
4
+ #
5
+
6
+ module IEV
7
+ # @todo This needs to be rewritten.
8
+ class Iso639Code
9
+ COUNTRY_CODES = YAML.load(IO.read(File.join(__dir__, "iso_639_2.yaml")))
10
+ THREE_CHAR_MEMO = {}
11
+
12
+ def initialize(two_char_code)
13
+ @code = case two_char_code.length
14
+ when 2
15
+ two_char_code
16
+ else
17
+ # This is to handle code "nl BE" in the IEV sheet
18
+ two_char_code.split(" ").first
19
+ end
20
+ end
21
+
22
+ def find(code_type)
23
+ code = country_codes.detect do |key, value|
24
+ key if value["iso_639_1"] == @code.to_s && value[code_type]
25
+ end
26
+
27
+ if code.nil?
28
+ raise StandardError.new("Iso639Code not found for '#{@code}'!")
29
+ end
30
+
31
+ code
32
+ end
33
+
34
+ def self.three_char_code(two_char_code, code_type = "terminology")
35
+ memo_index = [two_char_code, code_type]
36
+ THREE_CHAR_MEMO[memo_index] ||= new(two_char_code).find(code_type)
37
+ end
38
+
39
+ private
40
+
41
+ def country_codes
42
+ COUNTRY_CODES
43
+ end
44
+
45
+ end
46
+ end
47
+
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ # (c) Copyright 2020 Ribose Inc.
4
+ #
5
+
6
+ module IEV
7
+ class Profiler
8
+ attr_reader :bench, :dir, :prefix, :profile
9
+
10
+ def self.measure(prefix = nil, &block)
11
+ new(prefix).run(&block)
12
+ end
13
+
14
+ def initialize(prefix, dir: "profile")
15
+ @prefix = prefix
16
+ @dir = dir
17
+ end
18
+
19
+ def run(&block)
20
+ profiler_enabled? ? run!(&block) : block.call
21
+ end
22
+
23
+ def run!(&block)
24
+ retval = nil
25
+ @profile = RubyProf.profile allow_exceptions: true do
26
+ @bench = Benchmark.measure do
27
+ retval = block.call
28
+ end
29
+ end
30
+ retval
31
+ ensure
32
+ print_reports
33
+ end
34
+
35
+ def profiler_enabled?
36
+ $IEV_PROFILE
37
+ end
38
+
39
+ private
40
+
41
+ def print_reports
42
+ FileUtils.mkdir_p(dir)
43
+ print_benchmark("bench.txt")
44
+ print_profile("flat.txt", RubyProf::FlatPrinter)
45
+ print_profile("graph.html", RubyProf::GraphHtmlPrinter)
46
+ print_profile("calls.html", RubyProf::CallStackPrinter)
47
+ end
48
+
49
+ def print_benchmark(suffix)
50
+ return if bench.nil?
51
+
52
+ contents = [Benchmark::CAPTION, bench.to_s].join("\n")
53
+ File.write(report_file_name(suffix), contents)
54
+ end
55
+
56
+ def print_profile(suffix, printer)
57
+ return if profile.nil?
58
+
59
+ File.open(report_file_name(suffix), "w") do |file|
60
+ printer.new(profile).print(file)
61
+ end
62
+ end
63
+
64
+ def report_file_name(suffix)
65
+ base_name = [prefix, suffix].compact.join("-")
66
+ File.expand_path(base_name, dir)
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ # (c) Copyright 2020 Ribose Inc.
4
+ #
5
+
6
+ require "singleton"
7
+
8
+ module IEV
9
+ # Relaton cach singleton.
10
+ class RelatonDb
11
+ include Singleton
12
+ include CLI::UI
13
+
14
+ def initialize
15
+ info "Initializing Relaton..."
16
+ @db = Relaton::Db.new "db", nil
17
+ end
18
+
19
+ # @param code [String] reference
20
+ # @return [RelatonIso::IsoBibliongraphicItem]
21
+ def fetch(code)
22
+ retrying_on_failures do
23
+ capture_output_streams do
24
+ @db.fetch code
25
+ end
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def retrying_on_failures(attempts: 4)
32
+ curr_attempt = 1
33
+
34
+ begin
35
+ yield
36
+
37
+ rescue
38
+ if curr_attempt <= attempts
39
+ sleep(2 ** curr_attempt * 0.1)
40
+ curr_attempt += 1
41
+ retry
42
+ else
43
+ raise
44
+ end
45
+ end
46
+ end
47
+
48
+ def capture_output_streams
49
+ original_stdout = $stdout
50
+ original_stderr = $stderr
51
+ $stderr = $stdout = fake_out = StringIO.new
52
+
53
+ begin
54
+ yield
55
+
56
+ ensure
57
+ $stdout = original_stdout
58
+ $stderr = original_stderr
59
+ debug(:relaton, fake_out.string) if fake_out.pos > 0
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,350 @@
1
+ # frozen_string_literal: true
2
+
3
+ # (c) Copyright 2020 Ribose Inc.
4
+ #
5
+
6
+ # rubocop:todo Style/RedundantRegexpEscape
7
+
8
+ module IEV
9
+ # Parses information from the spreadsheet's SOURCE column.
10
+ #
11
+ # @example
12
+ # SourceParser.new(cell_data_string).parsed_sources
13
+ class SourceParser
14
+ include CLI::UI
15
+ include Utilities
16
+ using DataConversions
17
+
18
+ attr_reader :src_split, :parsed_sources, :raw_str, :src_str
19
+
20
+ def initialize(source_str, term_domain)
21
+ @raw_str = source_str.dup.freeze
22
+ @src_str = raw_str.decode_html.sanitize.freeze
23
+ @term_domain = term_domain
24
+ parse
25
+ end
26
+
27
+ private
28
+
29
+ def parse
30
+ @src_split = split_source_field(src_str)
31
+ @parsed_sources = src_split.map { |src| extract_single_source(src) }
32
+ end
33
+
34
+ def split_source_field(source)
35
+ # TODO Calling String#gsub with a single hash argument would be probably
36
+ # better than calling that method multiple times. But change is
37
+ # not necessarily that easy to do.
38
+
39
+ # IEC 62047-22:2014, 3.1.1, modified – In the definition, ...
40
+ source = source
41
+ .gsub(/;\s?([A-Z][A-Z])/, ';; \1')
42
+ .gsub(/MOD[,\.]/, "MOD;;")
43
+
44
+ # 702-01-02 MOD,ITU-R Rec. 431 MOD
45
+ # 161-06-01 MOD. ITU RR 139 MOD
46
+ source = source
47
+ .gsub(/MOD,\s*([UIC\d])/, 'MOD;; \1')
48
+ .gsub(/MOD[,\.]/, "MOD;;")
49
+
50
+ # 702-09-44 MOD, 723-07-47, voir 723-10-91
51
+ source = source
52
+ .gsub(/MOD,\s*(\d{3})/, 'MOD;; \1')
53
+ .gsub(/,\s*see\s*(\d{3})/, ';;see \1')
54
+ .gsub(/,\s*voir\s*(\d{3})/, ';;voir \1')
55
+
56
+ # IEC 62303:2008, 3.1, modified and IEC 62302:2007, 3.2; IAEA 4
57
+ # CEI 62303:2008, 3.1, modifiée et CEI 62302:2007, 3.2; AIEA 4
58
+ source = source
59
+ .gsub(/modified and ([ISOECUT])/, 'modified;; \1')
60
+ .gsub(/modifiée et ([ISOECUT])/, 'modifiée;; \1')
61
+
62
+ # 725-12-50, ITU RR 11
63
+ source = source.gsub(/,\s+ITU/, ";; ITU")
64
+
65
+ # 705-02-01, 702-02-07
66
+ source = source.gsub(/(\d{2,3}-\d{2,3}-\d{2,3}),\s*(\d{2,3}-\d{2,3}-\d{2,3})/, '\1;; \2') # rubocop:todo Layout/LineLength
67
+
68
+ source.split(";;").map(&:strip)
69
+ end
70
+
71
+ def extract_single_source(raw_ref)
72
+ relation_type = extract_source_relationship(raw_ref)
73
+ clean_ref = normalize_ref_string(raw_ref)
74
+ source_ref = extract_source_ref(clean_ref)
75
+ clause = extract_source_clause(clean_ref)
76
+
77
+ {
78
+ "ref" => source_ref,
79
+ "clause" => clause,
80
+ "link" => obtain_source_link(source_ref),
81
+ "relationship" => relation_type,
82
+ "original" => IEV::Converter.mathml_to_asciimath(
83
+ parse_anchor_tag(raw_ref, @term_domain),
84
+ ),
85
+ }.compact
86
+ rescue ::RelatonBib::RequestError => e
87
+ warn e.message
88
+ end
89
+
90
+ def normalize_ref_string(str)
91
+ # rubocop:todo Layout/LineLength
92
+
93
+ # définition 3.60 de la 62127-1
94
+ # definition 3.60 of 62127-1
95
+ # définition 3.60 de la 62127-1
96
+ # definition 3.7 of IEC 62127-1 MOD, adapted from 4.2.9 of IEC 61828 and 3.6 of IEC 61102
97
+ # définition 3.7 de la CEI 62127-1 MOD, adaptées sur la base du 4.2.9 de la CEI 61828 et du 3.6 de la CEI 61102
98
+ # definition 3.54 of 62127-1 MOD
99
+ # définition 3.54 de la CEI 62127-1 MOD
100
+ # IEC 62313:2009, 3.6, modified
101
+ # IEC 62313:2009, 3.6, modifié
102
+
103
+ str
104
+ .gsub(/CEI/, "IEC")
105
+ .gsub(/Guide IEC/, "IEC Guide")
106
+ .gsub(/Guide ISO\/IEC/, "ISO/IEC Guide")
107
+ .gsub(/VEI/, "IEV")
108
+ .gsub(/UIT/, "ITU")
109
+ .gsub(/IUT-R/, "ITU-R")
110
+ .gsub(/UTI-R/, "ITU-R")
111
+ .gsub(/Recomm[ea]ndation ITU-T/, "ITU-T Recommendation")
112
+ .gsub(/ITU-T (\w.\d{3}):(\d{4})/, 'ITU-T Recommendation \1 (\2)')
113
+ .gsub(/ITU-R Rec. (\d+)/, 'ITU-R Recommendation \1')
114
+ .gsub(/[≈≠]\s+/, "")
115
+ .sub(/ИЗМ\Z/, "MOD")
116
+ .sub(/definition ([\d\.]+) of ([\d\-\:]+) MOD/, 'IEC \2, \1, modified - ')
117
+ .sub(/definition ([\d\.]+) of IEC ([\d\-\:]+) MOD/, 'IEC \2, \1, modified - ')
118
+ .sub(/définition ([\d\.]+) de la ([\d\-\:]+) MOD/, 'IEC \2, \1, modified - ')
119
+ .sub(/définition ([\d\.]+) de la IEC ([\d\-\:]+) MOD/, 'IEC \2, \1, modified - ')
120
+ .sub(/(\d{3})\ (\d{2})\ (\d{2})/, '\1-\2-\3') # for 221 04 03
121
+
122
+ # .sub(/\A(from|d'après|voir la|see|See|voir|Voir)\s+/, "")
123
+
124
+ # rubocop:enable Layout/LineLength
125
+ end
126
+
127
+ def extract_source_ref(str)
128
+ match_source_ref_string(str)
129
+ .sub(/, modifi(ed|é)\Z/, "")
130
+ .strip
131
+ end
132
+
133
+ def match_source_ref_string(str)
134
+ case str
135
+ when /SI Brochure/, /Brochure sur le SI/
136
+ # SI Brochure, 9th edition, 2019, 2.3.1
137
+ # SI Brochure, 9th edition, 2019, Appendix 1
138
+ # Brochure sur le SI, 9<sup>e</sup> édition, 2019, Annexe 1
139
+ "BBIPM SI Brochure TEMP DISABLED DUE TO RELATON"
140
+
141
+ when /VIM/
142
+ "JCGM VIM"
143
+ # IEC 60050-121, 151-12-05
144
+ when /IEC 60050-(\d+), (\d{2,3}-\d{2,3}-\d{2,3})/
145
+ "IEC 60050-#{$1}"
146
+ when /IEC 60050-(\d+):(\d+), (\d{2,3}-\d{2,3}-\d{2,3})/
147
+ "IEC 60050-#{$1}:#{$2}"
148
+ when /(AIEA|IAEA) (\d+)/
149
+ "IAEA #{$2}"
150
+ when /IEC\sIEEE ([\d\:\-]+)/
151
+ "IEC/IEEE #{$1}".sub(/:\Z/, "")
152
+ when /CISPR ([\d\:\-]+)/
153
+ "IEC CISPR #{$1}"
154
+ when /RR (\d+)/
155
+ "ITU-R RR"
156
+ # IEC 50(845)
157
+ when /IEC (\d+)\((\d+)\)/
158
+ "IEC 600#{$1}-#{$1}"
159
+ when /(ISO|IEC)[\/\ ](PAS|TR|TS) ([\d\:\-]+)/
160
+ "#{$1}/#{$2} #{$3}".sub(/:\Z/, "")
161
+ when /ISO\/IEC ([\d\:\-]+)/
162
+ "ISO/IEC #{$1}".sub(/:\Z/, "")
163
+ when /ISO\/IEC\/IEEE ([\d\:\-]+)/
164
+ "ISO/IEC/IEEE #{$1}".sub(/:\Z/, "")
165
+
166
+ # ISO 140/4
167
+ when /ISO (\d+)\/(\d+)/
168
+ "ISO #{$1}-#{$2}"
169
+ when /Norme ISO (\d+)-(\d+)/
170
+ "ISO #{$1}:#{$2}"
171
+ when /ISO\/IEC Guide ([\d\:\-]+)/i
172
+ "ISO/IEC Guide #{$1}".sub(/:\Z/, "")
173
+ when /(ISO|IEC) Guide ([\d\:\-]+)/i
174
+ "#{$1} Guide #{$2}".sub(/:\Z/, "")
175
+
176
+ # ITU-T Recommendation F.791 (11/2015)
177
+ when /ITU-T Recommendation (\w.\d+) \((\d+\/\d+)\)/i
178
+ "ITU-T Recommendation #{$1} (#{$2})"
179
+
180
+ # ITU-T Recommendation F.791:2015
181
+ when /ITU-T Recommendation (\w.\d+):(\d+)/i
182
+ "ITU-T Recommendation #{$1} (#{$2})"
183
+
184
+ when /ITU-T Recommendation (\w\.\d+)/i
185
+ "ITU-T Recommendation #{$1}"
186
+
187
+ # ITU-R Recommendation 592 MOD
188
+ when /ITU-R Recommendation (\d+)/i
189
+ "ITU-R Recommendation #{$1}"
190
+ # ISO 669: 2000 3.1.16
191
+ when /ISO ([\d\-]+:\s?\d{4})/
192
+ "ISO #{$1}".sub(/:\Z/, "")
193
+ when /ISO ([\d\:\-]+)/
194
+ "ISO #{$1}".sub(/:\Z/, "")
195
+ when /IEC ([\d\:\-]+)/
196
+ "IEC #{$1}".sub(/:\Z/, "")
197
+ when /definition (\d\.[\d\.]+) of ([\d\-]*)/,
198
+ /définition (\d\.[\d\.]+) de la ([\d\-]*)/
199
+ "IEC #{$2}".sub(/:\Z/, "")
200
+
201
+ when /IEV (\d{2,3}-\d{2,3}-\d{2,3})/, /(\d{2,3}-\d{2,3}-\d{2,3})/
202
+ "IEV"
203
+ when /IEV part\s+(\d+)/, /partie\s+(\d+)\s+de l'IEV/
204
+ "IEC 60050-#{$1}"
205
+
206
+ when /International Telecommunication Union (ITU) Constitution/,
207
+ /Constitution de l’Union internationale des télécommunications (UIT)/
208
+ "International Telecommunication Union (ITU) Constitution (Ed. 2015)"
209
+ else
210
+ debug :sources, "Failed to parse source: '#{str}'"
211
+ str
212
+ end
213
+ end
214
+
215
+ def extract_source_clause(str)
216
+ # rubocop:todo Layout/LineLength
217
+
218
+ # Strip out the modifications
219
+ str = str.sub(/[,\ ]*modif.+\s[-–].*\Z/, "")
220
+
221
+ # Strip these:
222
+ # see figure 466-6
223
+ # voir fig. 4.9
224
+ str = str.gsub(/\A(see|voir) fig. [\d\.]+/, "")
225
+ str = str.gsub(/\A(see|voir) figure [\d\.]+/, "")
226
+
227
+ # str = 'ITU-T Recommendation F.791:2015, 3.14,'
228
+ results = [
229
+ [/RR (\d+)/, "1"],
230
+ [/VIM (.+)/, "1"],
231
+ [/item (\d\.[\d\.]+)/, "1"],
232
+ [/d[eé]finition (\d[\d\.]+)/, "1"],
233
+ [/figure ([\d\.\-]+)/, "figure 1"],
234
+ [/fig\. ([\d\.\-]+)/, "figure 1"],
235
+ [/IEV (\d{2,3}-\d{2,3}-\d{2,3})/, "1"],
236
+ [/(\d{2,3}-\d{2,3}-\d{2,3})/, "1"],
237
+
238
+ # 221 04 03
239
+ [/(\d{3}\ \d{2}\ \d{2})/, "1"],
240
+ # ", 1.1"
241
+
242
+ # "SI Brochure, 9th edition, 2019, 2.3.1,"
243
+ [/,\s?(\d+\.[\d\.]+)/, "1"],
244
+ # SI Brochure, 9th edition, 2019, Appendix 1, modified
245
+ # Brochure sur le SI, 9<sup>e</sup> édition, 2019, Annexe 1,
246
+ [/\d{4}, (Appendix \d)/, "1"],
247
+ [/\d{4}, (Annexe \d)/, "1"],
248
+
249
+ # International Telecommunication Union (ITU) Constitution (Ed. 2015), No. 1012 of the Annex,
250
+ # Constitution de l’Union internationale des télécommunications (UIT) (Ed. 2015), N° 1012 de l’Annexe,
251
+ [/, (No. \d{4} of the Annex)/, "1"],
252
+ [/, (N° \d{4} 1012 de l’Annexe)/, "1"],
253
+
254
+ # ISO/IEC 2382:2015 (https://www.iso.org/obp/ui/#iso:std:iso-iec:2382:ed-1:v1:en), 2126371
255
+ [/\), (\d{7}),/, "1"],
256
+
257
+ # " 1.1 "
258
+ [/\s(\d+\.[\d\.]+)\s?/, "1"],
259
+ # "ISO/IEC Guide 2 (14.1)"
260
+ [/\((\d+\.[\d\.]+)\)/, "1"],
261
+
262
+ # "ISO/IEC Guide 2 (14.5 MOD)"
263
+ [/\((\d+\.[\d\.]+)\ MOD\)/, "1"],
264
+
265
+ # ISO 80000-10:2009, item 10-2.b,
266
+ # ISO 80000-10:2009, point 10-2.b,
267
+
268
+ [/\AISO 80000-10:2009, (item [\d\.\-]+\w?)/, "1"],
269
+ [/\AISO 80000-10:2009, (point [\d\.\-]+\w?)/, "1"],
270
+
271
+ # IEC 80000-13:2008, 13-9,
272
+ [/\AIEC 80000-13:2008, ([\d\.\-]+\w?),/, "1"],
273
+ [/\AIEC 80000-13:2008, ([\d\.\-]+\w?)\Z/, "1"],
274
+
275
+ # ISO 921:1997, definition 6,
276
+ # ISO 921:1997, définition 6,
277
+ [/\AISO [\d:]+, (d[ée]finition \d+)/, "1"],
278
+
279
+ # "ISO/IEC/IEEE 24765:2010, <i>Systems and software engineering – Vocabulary</i>, 3.234 (2)
280
+ [/, ([\d\.\w]+ \(\d+\))/, "1"],
281
+ ].map do |regex, rule|
282
+ # TODO Rubocop complains about unused rule -- need to make sure
283
+ # that no one forgot about something.
284
+ res = []
285
+ # puts "str is '#{str}'"
286
+ # puts "regex is '#{regex.to_s}'"
287
+ str.scan(regex).each do |result|
288
+ # puts "result is #{result.first}"
289
+ res << {
290
+ index: $~.offset(0)[0],
291
+ clause: result.first.strip,
292
+ }
293
+ end
294
+ res
295
+ # sort by index and also the length of match
296
+ end.flatten.sort_by { |hash| [hash[:index], -hash[:clause].length] }
297
+
298
+ # pp results
299
+
300
+ results.dig(0, :clause)
301
+
302
+ # rubocop:enable Layout/LineLength
303
+ end
304
+
305
+ def extract_source_relationship(str)
306
+ type = case str
307
+ when /≠/
308
+ :not_equal
309
+ when /≈/
310
+ :similar
311
+ when /^([Ss]ee)|([Vv]oir)/
312
+ :related
313
+ when /MOD/, /ИЗМ/
314
+ :modified
315
+ when /modified/, /modifié/
316
+ :modified
317
+ when /^(from|d'après)/,
318
+ /^(definition (.+) of)|(définition (.+) de la)/
319
+ :identical
320
+ else
321
+ :identical
322
+ end
323
+
324
+ case str
325
+ when /^MOD ([\d\-])/
326
+ {
327
+ "type" => type.to_s,
328
+ }
329
+ when /(modified|modifié|modifiée|modifiés|MOD)\s*[–-]?\s+(.+)\Z/
330
+ {
331
+ "type" => type.to_s,
332
+ "modification" => IEV::Converter.mathml_to_asciimath(
333
+ parse_anchor_tag($2, @term_domain),
334
+ ).strip,
335
+ }
336
+ else
337
+ {
338
+ "type" => type.to_s,
339
+ }
340
+ end
341
+ end
342
+
343
+ # Uses Relaton to obtain link for given source ref.
344
+ def obtain_source_link(ref)
345
+ RelatonDb.instance.fetch(ref)&.url
346
+ end
347
+ end
348
+ end
349
+
350
+ # rubocop:enable Style/RedundantRegexpEscape
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ # (c) Copyright 2020 Ribose Inc.
4
+ #
5
+
6
+ module IEV
7
+ # Parses information from the spreadsheet's REPLACES column.
8
+ #
9
+ # @example
10
+ # SupersessionParser.new(cell_data_string).supersessions
11
+ class SupersessionParser
12
+ include CLI::UI
13
+ using DataConversions
14
+
15
+ attr_reader :raw_str, :src_str
16
+
17
+ attr_reader :supersessions
18
+
19
+ # Regular expression which describes IEV relation, for example
20
+ # +881-01-23:1983-01+ or +845-03-55:1987+.
21
+ IEV_SUPERSESSION_RX = %r{
22
+ \A
23
+ (?:IEV\s+)? # some are prefixed with IEV, it is unnecessary though
24
+ (?<ref>\d{3}-\d{2}-\d{2})
25
+ \s* # some have whitespaces around the separator
26
+ : # separator
27
+ \s* # some have whitespaces around the separator
28
+ (?<version>[-0-9]+)
29
+ \Z
30
+ }x.freeze
31
+
32
+ def initialize(source_str)
33
+ @raw_str = source_str.dup.freeze
34
+ @src_str = raw_str.sanitize.freeze
35
+ @supersessions = parse
36
+ end
37
+
38
+ private
39
+
40
+ def parse
41
+ return if empty_source?
42
+
43
+ if IEV_SUPERSESSION_RX =~ src_str
44
+ [relation_from_match($~)]
45
+ else
46
+ warn "Incorrect supersession: '#{src_str}'"
47
+ nil
48
+ end
49
+ end
50
+
51
+ def empty_source?
52
+ /\w/ !~ src_str
53
+ end
54
+
55
+ def relation_from_match(match_data)
56
+ {
57
+ "type" => "supersedes",
58
+ "ref" => iev_ref_from_match(match_data),
59
+ }
60
+ end
61
+
62
+ def iev_ref_from_match(match_data)
63
+ {
64
+ "source" => "IEV",
65
+ "id" => match_data[:ref],
66
+ "version" => match_data[:version],
67
+ }
68
+ end
69
+ end
70
+ end