iev 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -1
- data/.github/workflows/release.yml +25 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +0 -2
- data/README.adoc +4 -4
- data/exe/iev-glossarist +21 -0
- data/iev.gemspec +12 -3
- data/lib/iev/cli/command.rb +109 -0
- data/lib/iev/cli/command_helper.rb +83 -0
- data/lib/iev/cli/ui.rb +70 -0
- data/lib/iev/cli.rb +22 -0
- data/lib/iev/converter/mathml_to_asciimath.rb +197 -0
- data/lib/iev/converter.rb +9 -0
- data/lib/iev/data_conversions.rb +39 -0
- data/lib/iev/db.rb +3 -3
- data/lib/iev/db_cache.rb +2 -2
- data/lib/iev/db_writer.rb +81 -0
- data/lib/iev/iso_639_2.yaml +4075 -0
- data/lib/iev/iso_639_code.rb +47 -0
- data/lib/iev/profiler.rb +69 -0
- data/lib/iev/relaton_db.rb +63 -0
- data/lib/iev/source_parser.rb +350 -0
- data/lib/iev/supersession_parser.rb +70 -0
- data/lib/iev/term_attrs_parser.rb +143 -0
- data/lib/iev/term_builder.rb +313 -0
- data/lib/iev/utilities.rb +58 -0
- data/lib/iev/version.rb +2 -2
- data/lib/iev.rb +24 -2
- metadata +153 -10
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# (c) Copyright 2020 Ribose Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
module IEV
|
7
|
+
# @todo This needs to be rewritten.
|
8
|
+
class Iso639Code
|
9
|
+
COUNTRY_CODES = YAML.load(IO.read(File.join(__dir__, "iso_639_2.yaml")))
|
10
|
+
THREE_CHAR_MEMO = {}
|
11
|
+
|
12
|
+
def initialize(two_char_code)
|
13
|
+
@code = case two_char_code.length
|
14
|
+
when 2
|
15
|
+
two_char_code
|
16
|
+
else
|
17
|
+
# This is to handle code "nl BE" in the IEV sheet
|
18
|
+
two_char_code.split(" ").first
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def find(code_type)
|
23
|
+
code = country_codes.detect do |key, value|
|
24
|
+
key if value["iso_639_1"] == @code.to_s && value[code_type]
|
25
|
+
end
|
26
|
+
|
27
|
+
if code.nil?
|
28
|
+
raise StandardError.new("Iso639Code not found for '#{@code}'!")
|
29
|
+
end
|
30
|
+
|
31
|
+
code
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.three_char_code(two_char_code, code_type = "terminology")
|
35
|
+
memo_index = [two_char_code, code_type]
|
36
|
+
THREE_CHAR_MEMO[memo_index] ||= new(two_char_code).find(code_type)
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def country_codes
|
42
|
+
COUNTRY_CODES
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
data/lib/iev/profiler.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# (c) Copyright 2020 Ribose Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
module IEV
|
7
|
+
class Profiler
|
8
|
+
attr_reader :bench, :dir, :prefix, :profile
|
9
|
+
|
10
|
+
def self.measure(prefix = nil, &block)
|
11
|
+
new(prefix).run(&block)
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(prefix, dir: "profile")
|
15
|
+
@prefix = prefix
|
16
|
+
@dir = dir
|
17
|
+
end
|
18
|
+
|
19
|
+
def run(&block)
|
20
|
+
profiler_enabled? ? run!(&block) : block.call
|
21
|
+
end
|
22
|
+
|
23
|
+
def run!(&block)
|
24
|
+
retval = nil
|
25
|
+
@profile = RubyProf.profile allow_exceptions: true do
|
26
|
+
@bench = Benchmark.measure do
|
27
|
+
retval = block.call
|
28
|
+
end
|
29
|
+
end
|
30
|
+
retval
|
31
|
+
ensure
|
32
|
+
print_reports
|
33
|
+
end
|
34
|
+
|
35
|
+
def profiler_enabled?
|
36
|
+
$IEV_PROFILE
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def print_reports
|
42
|
+
FileUtils.mkdir_p(dir)
|
43
|
+
print_benchmark("bench.txt")
|
44
|
+
print_profile("flat.txt", RubyProf::FlatPrinter)
|
45
|
+
print_profile("graph.html", RubyProf::GraphHtmlPrinter)
|
46
|
+
print_profile("calls.html", RubyProf::CallStackPrinter)
|
47
|
+
end
|
48
|
+
|
49
|
+
def print_benchmark(suffix)
|
50
|
+
return if bench.nil?
|
51
|
+
|
52
|
+
contents = [Benchmark::CAPTION, bench.to_s].join("\n")
|
53
|
+
File.write(report_file_name(suffix), contents)
|
54
|
+
end
|
55
|
+
|
56
|
+
def print_profile(suffix, printer)
|
57
|
+
return if profile.nil?
|
58
|
+
|
59
|
+
File.open(report_file_name(suffix), "w") do |file|
|
60
|
+
printer.new(profile).print(file)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def report_file_name(suffix)
|
65
|
+
base_name = [prefix, suffix].compact.join("-")
|
66
|
+
File.expand_path(base_name, dir)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# (c) Copyright 2020 Ribose Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
require "singleton"
|
7
|
+
|
8
|
+
module IEV
|
9
|
+
# Relaton cach singleton.
|
10
|
+
class RelatonDb
|
11
|
+
include Singleton
|
12
|
+
include CLI::UI
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
info "Initializing Relaton..."
|
16
|
+
@db = Relaton::Db.new "db", nil
|
17
|
+
end
|
18
|
+
|
19
|
+
# @param code [String] reference
|
20
|
+
# @return [RelatonIso::IsoBibliongraphicItem]
|
21
|
+
def fetch(code)
|
22
|
+
retrying_on_failures do
|
23
|
+
capture_output_streams do
|
24
|
+
@db.fetch code
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def retrying_on_failures(attempts: 4)
|
32
|
+
curr_attempt = 1
|
33
|
+
|
34
|
+
begin
|
35
|
+
yield
|
36
|
+
|
37
|
+
rescue
|
38
|
+
if curr_attempt <= attempts
|
39
|
+
sleep(2 ** curr_attempt * 0.1)
|
40
|
+
curr_attempt += 1
|
41
|
+
retry
|
42
|
+
else
|
43
|
+
raise
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def capture_output_streams
|
49
|
+
original_stdout = $stdout
|
50
|
+
original_stderr = $stderr
|
51
|
+
$stderr = $stdout = fake_out = StringIO.new
|
52
|
+
|
53
|
+
begin
|
54
|
+
yield
|
55
|
+
|
56
|
+
ensure
|
57
|
+
$stdout = original_stdout
|
58
|
+
$stderr = original_stderr
|
59
|
+
debug(:relaton, fake_out.string) if fake_out.pos > 0
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,350 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# (c) Copyright 2020 Ribose Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
# rubocop:todo Style/RedundantRegexpEscape
|
7
|
+
|
8
|
+
module IEV
|
9
|
+
# Parses information from the spreadsheet's SOURCE column.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# SourceParser.new(cell_data_string).parsed_sources
|
13
|
+
class SourceParser
|
14
|
+
include CLI::UI
|
15
|
+
include Utilities
|
16
|
+
using DataConversions
|
17
|
+
|
18
|
+
attr_reader :src_split, :parsed_sources, :raw_str, :src_str
|
19
|
+
|
20
|
+
def initialize(source_str, term_domain)
|
21
|
+
@raw_str = source_str.dup.freeze
|
22
|
+
@src_str = raw_str.decode_html.sanitize.freeze
|
23
|
+
@term_domain = term_domain
|
24
|
+
parse
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def parse
|
30
|
+
@src_split = split_source_field(src_str)
|
31
|
+
@parsed_sources = src_split.map { |src| extract_single_source(src) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def split_source_field(source)
|
35
|
+
# TODO Calling String#gsub with a single hash argument would be probably
|
36
|
+
# better than calling that method multiple times. But change is
|
37
|
+
# not necessarily that easy to do.
|
38
|
+
|
39
|
+
# IEC 62047-22:2014, 3.1.1, modified – In the definition, ...
|
40
|
+
source = source
|
41
|
+
.gsub(/;\s?([A-Z][A-Z])/, ';; \1')
|
42
|
+
.gsub(/MOD[,\.]/, "MOD;;")
|
43
|
+
|
44
|
+
# 702-01-02 MOD,ITU-R Rec. 431 MOD
|
45
|
+
# 161-06-01 MOD. ITU RR 139 MOD
|
46
|
+
source = source
|
47
|
+
.gsub(/MOD,\s*([UIC\d])/, 'MOD;; \1')
|
48
|
+
.gsub(/MOD[,\.]/, "MOD;;")
|
49
|
+
|
50
|
+
# 702-09-44 MOD, 723-07-47, voir 723-10-91
|
51
|
+
source = source
|
52
|
+
.gsub(/MOD,\s*(\d{3})/, 'MOD;; \1')
|
53
|
+
.gsub(/,\s*see\s*(\d{3})/, ';;see \1')
|
54
|
+
.gsub(/,\s*voir\s*(\d{3})/, ';;voir \1')
|
55
|
+
|
56
|
+
# IEC 62303:2008, 3.1, modified and IEC 62302:2007, 3.2; IAEA 4
|
57
|
+
# CEI 62303:2008, 3.1, modifiée et CEI 62302:2007, 3.2; AIEA 4
|
58
|
+
source = source
|
59
|
+
.gsub(/modified and ([ISOECUT])/, 'modified;; \1')
|
60
|
+
.gsub(/modifiée et ([ISOECUT])/, 'modifiée;; \1')
|
61
|
+
|
62
|
+
# 725-12-50, ITU RR 11
|
63
|
+
source = source.gsub(/,\s+ITU/, ";; ITU")
|
64
|
+
|
65
|
+
# 705-02-01, 702-02-07
|
66
|
+
source = source.gsub(/(\d{2,3}-\d{2,3}-\d{2,3}),\s*(\d{2,3}-\d{2,3}-\d{2,3})/, '\1;; \2') # rubocop:todo Layout/LineLength
|
67
|
+
|
68
|
+
source.split(";;").map(&:strip)
|
69
|
+
end
|
70
|
+
|
71
|
+
def extract_single_source(raw_ref)
|
72
|
+
relation_type = extract_source_relationship(raw_ref)
|
73
|
+
clean_ref = normalize_ref_string(raw_ref)
|
74
|
+
source_ref = extract_source_ref(clean_ref)
|
75
|
+
clause = extract_source_clause(clean_ref)
|
76
|
+
|
77
|
+
{
|
78
|
+
"ref" => source_ref,
|
79
|
+
"clause" => clause,
|
80
|
+
"link" => obtain_source_link(source_ref),
|
81
|
+
"relationship" => relation_type,
|
82
|
+
"original" => IEV::Converter.mathml_to_asciimath(
|
83
|
+
parse_anchor_tag(raw_ref, @term_domain),
|
84
|
+
),
|
85
|
+
}.compact
|
86
|
+
rescue ::RelatonBib::RequestError => e
|
87
|
+
warn e.message
|
88
|
+
end
|
89
|
+
|
90
|
+
def normalize_ref_string(str)
|
91
|
+
# rubocop:todo Layout/LineLength
|
92
|
+
|
93
|
+
# définition 3.60 de la 62127-1
|
94
|
+
# definition 3.60 of 62127-1
|
95
|
+
# définition 3.60 de la 62127-1
|
96
|
+
# definition 3.7 of IEC 62127-1 MOD, adapted from 4.2.9 of IEC 61828 and 3.6 of IEC 61102
|
97
|
+
# définition 3.7 de la CEI 62127-1 MOD, adaptées sur la base du 4.2.9 de la CEI 61828 et du 3.6 de la CEI 61102
|
98
|
+
# definition 3.54 of 62127-1 MOD
|
99
|
+
# définition 3.54 de la CEI 62127-1 MOD
|
100
|
+
# IEC 62313:2009, 3.6, modified
|
101
|
+
# IEC 62313:2009, 3.6, modifié
|
102
|
+
|
103
|
+
str
|
104
|
+
.gsub(/CEI/, "IEC")
|
105
|
+
.gsub(/Guide IEC/, "IEC Guide")
|
106
|
+
.gsub(/Guide ISO\/IEC/, "ISO/IEC Guide")
|
107
|
+
.gsub(/VEI/, "IEV")
|
108
|
+
.gsub(/UIT/, "ITU")
|
109
|
+
.gsub(/IUT-R/, "ITU-R")
|
110
|
+
.gsub(/UTI-R/, "ITU-R")
|
111
|
+
.gsub(/Recomm[ea]ndation ITU-T/, "ITU-T Recommendation")
|
112
|
+
.gsub(/ITU-T (\w.\d{3}):(\d{4})/, 'ITU-T Recommendation \1 (\2)')
|
113
|
+
.gsub(/ITU-R Rec. (\d+)/, 'ITU-R Recommendation \1')
|
114
|
+
.gsub(/[≈≠]\s+/, "")
|
115
|
+
.sub(/ИЗМ\Z/, "MOD")
|
116
|
+
.sub(/definition ([\d\.]+) of ([\d\-\:]+) MOD/, 'IEC \2, \1, modified - ')
|
117
|
+
.sub(/definition ([\d\.]+) of IEC ([\d\-\:]+) MOD/, 'IEC \2, \1, modified - ')
|
118
|
+
.sub(/définition ([\d\.]+) de la ([\d\-\:]+) MOD/, 'IEC \2, \1, modified - ')
|
119
|
+
.sub(/définition ([\d\.]+) de la IEC ([\d\-\:]+) MOD/, 'IEC \2, \1, modified - ')
|
120
|
+
.sub(/(\d{3})\ (\d{2})\ (\d{2})/, '\1-\2-\3') # for 221 04 03
|
121
|
+
|
122
|
+
# .sub(/\A(from|d'après|voir la|see|See|voir|Voir)\s+/, "")
|
123
|
+
|
124
|
+
# rubocop:enable Layout/LineLength
|
125
|
+
end
|
126
|
+
|
127
|
+
def extract_source_ref(str)
|
128
|
+
match_source_ref_string(str)
|
129
|
+
.sub(/, modifi(ed|é)\Z/, "")
|
130
|
+
.strip
|
131
|
+
end
|
132
|
+
|
133
|
+
def match_source_ref_string(str)
|
134
|
+
case str
|
135
|
+
when /SI Brochure/, /Brochure sur le SI/
|
136
|
+
# SI Brochure, 9th edition, 2019, 2.3.1
|
137
|
+
# SI Brochure, 9th edition, 2019, Appendix 1
|
138
|
+
# Brochure sur le SI, 9<sup>e</sup> édition, 2019, Annexe 1
|
139
|
+
"BBIPM SI Brochure TEMP DISABLED DUE TO RELATON"
|
140
|
+
|
141
|
+
when /VIM/
|
142
|
+
"JCGM VIM"
|
143
|
+
# IEC 60050-121, 151-12-05
|
144
|
+
when /IEC 60050-(\d+), (\d{2,3}-\d{2,3}-\d{2,3})/
|
145
|
+
"IEC 60050-#{$1}"
|
146
|
+
when /IEC 60050-(\d+):(\d+), (\d{2,3}-\d{2,3}-\d{2,3})/
|
147
|
+
"IEC 60050-#{$1}:#{$2}"
|
148
|
+
when /(AIEA|IAEA) (\d+)/
|
149
|
+
"IAEA #{$2}"
|
150
|
+
when /IEC\sIEEE ([\d\:\-]+)/
|
151
|
+
"IEC/IEEE #{$1}".sub(/:\Z/, "")
|
152
|
+
when /CISPR ([\d\:\-]+)/
|
153
|
+
"IEC CISPR #{$1}"
|
154
|
+
when /RR (\d+)/
|
155
|
+
"ITU-R RR"
|
156
|
+
# IEC 50(845)
|
157
|
+
when /IEC (\d+)\((\d+)\)/
|
158
|
+
"IEC 600#{$1}-#{$1}"
|
159
|
+
when /(ISO|IEC)[\/\ ](PAS|TR|TS) ([\d\:\-]+)/
|
160
|
+
"#{$1}/#{$2} #{$3}".sub(/:\Z/, "")
|
161
|
+
when /ISO\/IEC ([\d\:\-]+)/
|
162
|
+
"ISO/IEC #{$1}".sub(/:\Z/, "")
|
163
|
+
when /ISO\/IEC\/IEEE ([\d\:\-]+)/
|
164
|
+
"ISO/IEC/IEEE #{$1}".sub(/:\Z/, "")
|
165
|
+
|
166
|
+
# ISO 140/4
|
167
|
+
when /ISO (\d+)\/(\d+)/
|
168
|
+
"ISO #{$1}-#{$2}"
|
169
|
+
when /Norme ISO (\d+)-(\d+)/
|
170
|
+
"ISO #{$1}:#{$2}"
|
171
|
+
when /ISO\/IEC Guide ([\d\:\-]+)/i
|
172
|
+
"ISO/IEC Guide #{$1}".sub(/:\Z/, "")
|
173
|
+
when /(ISO|IEC) Guide ([\d\:\-]+)/i
|
174
|
+
"#{$1} Guide #{$2}".sub(/:\Z/, "")
|
175
|
+
|
176
|
+
# ITU-T Recommendation F.791 (11/2015)
|
177
|
+
when /ITU-T Recommendation (\w.\d+) \((\d+\/\d+)\)/i
|
178
|
+
"ITU-T Recommendation #{$1} (#{$2})"
|
179
|
+
|
180
|
+
# ITU-T Recommendation F.791:2015
|
181
|
+
when /ITU-T Recommendation (\w.\d+):(\d+)/i
|
182
|
+
"ITU-T Recommendation #{$1} (#{$2})"
|
183
|
+
|
184
|
+
when /ITU-T Recommendation (\w\.\d+)/i
|
185
|
+
"ITU-T Recommendation #{$1}"
|
186
|
+
|
187
|
+
# ITU-R Recommendation 592 MOD
|
188
|
+
when /ITU-R Recommendation (\d+)/i
|
189
|
+
"ITU-R Recommendation #{$1}"
|
190
|
+
# ISO 669: 2000 3.1.16
|
191
|
+
when /ISO ([\d\-]+:\s?\d{4})/
|
192
|
+
"ISO #{$1}".sub(/:\Z/, "")
|
193
|
+
when /ISO ([\d\:\-]+)/
|
194
|
+
"ISO #{$1}".sub(/:\Z/, "")
|
195
|
+
when /IEC ([\d\:\-]+)/
|
196
|
+
"IEC #{$1}".sub(/:\Z/, "")
|
197
|
+
when /definition (\d\.[\d\.]+) of ([\d\-]*)/,
|
198
|
+
/définition (\d\.[\d\.]+) de la ([\d\-]*)/
|
199
|
+
"IEC #{$2}".sub(/:\Z/, "")
|
200
|
+
|
201
|
+
when /IEV (\d{2,3}-\d{2,3}-\d{2,3})/, /(\d{2,3}-\d{2,3}-\d{2,3})/
|
202
|
+
"IEV"
|
203
|
+
when /IEV part\s+(\d+)/, /partie\s+(\d+)\s+de l'IEV/
|
204
|
+
"IEC 60050-#{$1}"
|
205
|
+
|
206
|
+
when /International Telecommunication Union (ITU) Constitution/,
|
207
|
+
/Constitution de l’Union internationale des télécommunications (UIT)/
|
208
|
+
"International Telecommunication Union (ITU) Constitution (Ed. 2015)"
|
209
|
+
else
|
210
|
+
debug :sources, "Failed to parse source: '#{str}'"
|
211
|
+
str
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def extract_source_clause(str)
|
216
|
+
# rubocop:todo Layout/LineLength
|
217
|
+
|
218
|
+
# Strip out the modifications
|
219
|
+
str = str.sub(/[,\ ]*modif.+\s[-–].*\Z/, "")
|
220
|
+
|
221
|
+
# Strip these:
|
222
|
+
# see figure 466-6
|
223
|
+
# voir fig. 4.9
|
224
|
+
str = str.gsub(/\A(see|voir) fig. [\d\.]+/, "")
|
225
|
+
str = str.gsub(/\A(see|voir) figure [\d\.]+/, "")
|
226
|
+
|
227
|
+
# str = 'ITU-T Recommendation F.791:2015, 3.14,'
|
228
|
+
results = [
|
229
|
+
[/RR (\d+)/, "1"],
|
230
|
+
[/VIM (.+)/, "1"],
|
231
|
+
[/item (\d\.[\d\.]+)/, "1"],
|
232
|
+
[/d[eé]finition (\d[\d\.]+)/, "1"],
|
233
|
+
[/figure ([\d\.\-]+)/, "figure 1"],
|
234
|
+
[/fig\. ([\d\.\-]+)/, "figure 1"],
|
235
|
+
[/IEV (\d{2,3}-\d{2,3}-\d{2,3})/, "1"],
|
236
|
+
[/(\d{2,3}-\d{2,3}-\d{2,3})/, "1"],
|
237
|
+
|
238
|
+
# 221 04 03
|
239
|
+
[/(\d{3}\ \d{2}\ \d{2})/, "1"],
|
240
|
+
# ", 1.1"
|
241
|
+
|
242
|
+
# "SI Brochure, 9th edition, 2019, 2.3.1,"
|
243
|
+
[/,\s?(\d+\.[\d\.]+)/, "1"],
|
244
|
+
# SI Brochure, 9th edition, 2019, Appendix 1, modified
|
245
|
+
# Brochure sur le SI, 9<sup>e</sup> édition, 2019, Annexe 1,
|
246
|
+
[/\d{4}, (Appendix \d)/, "1"],
|
247
|
+
[/\d{4}, (Annexe \d)/, "1"],
|
248
|
+
|
249
|
+
# International Telecommunication Union (ITU) Constitution (Ed. 2015), No. 1012 of the Annex,
|
250
|
+
# Constitution de l’Union internationale des télécommunications (UIT) (Ed. 2015), N° 1012 de l’Annexe,
|
251
|
+
[/, (No. \d{4} of the Annex)/, "1"],
|
252
|
+
[/, (N° \d{4} 1012 de l’Annexe)/, "1"],
|
253
|
+
|
254
|
+
# ISO/IEC 2382:2015 (https://www.iso.org/obp/ui/#iso:std:iso-iec:2382:ed-1:v1:en), 2126371
|
255
|
+
[/\), (\d{7}),/, "1"],
|
256
|
+
|
257
|
+
# " 1.1 "
|
258
|
+
[/\s(\d+\.[\d\.]+)\s?/, "1"],
|
259
|
+
# "ISO/IEC Guide 2 (14.1)"
|
260
|
+
[/\((\d+\.[\d\.]+)\)/, "1"],
|
261
|
+
|
262
|
+
# "ISO/IEC Guide 2 (14.5 MOD)"
|
263
|
+
[/\((\d+\.[\d\.]+)\ MOD\)/, "1"],
|
264
|
+
|
265
|
+
# ISO 80000-10:2009, item 10-2.b,
|
266
|
+
# ISO 80000-10:2009, point 10-2.b,
|
267
|
+
|
268
|
+
[/\AISO 80000-10:2009, (item [\d\.\-]+\w?)/, "1"],
|
269
|
+
[/\AISO 80000-10:2009, (point [\d\.\-]+\w?)/, "1"],
|
270
|
+
|
271
|
+
# IEC 80000-13:2008, 13-9,
|
272
|
+
[/\AIEC 80000-13:2008, ([\d\.\-]+\w?),/, "1"],
|
273
|
+
[/\AIEC 80000-13:2008, ([\d\.\-]+\w?)\Z/, "1"],
|
274
|
+
|
275
|
+
# ISO 921:1997, definition 6,
|
276
|
+
# ISO 921:1997, définition 6,
|
277
|
+
[/\AISO [\d:]+, (d[ée]finition \d+)/, "1"],
|
278
|
+
|
279
|
+
# "ISO/IEC/IEEE 24765:2010, <i>Systems and software engineering – Vocabulary</i>, 3.234 (2)
|
280
|
+
[/, ([\d\.\w]+ \(\d+\))/, "1"],
|
281
|
+
].map do |regex, rule|
|
282
|
+
# TODO Rubocop complains about unused rule -- need to make sure
|
283
|
+
# that no one forgot about something.
|
284
|
+
res = []
|
285
|
+
# puts "str is '#{str}'"
|
286
|
+
# puts "regex is '#{regex.to_s}'"
|
287
|
+
str.scan(regex).each do |result|
|
288
|
+
# puts "result is #{result.first}"
|
289
|
+
res << {
|
290
|
+
index: $~.offset(0)[0],
|
291
|
+
clause: result.first.strip,
|
292
|
+
}
|
293
|
+
end
|
294
|
+
res
|
295
|
+
# sort by index and also the length of match
|
296
|
+
end.flatten.sort_by { |hash| [hash[:index], -hash[:clause].length] }
|
297
|
+
|
298
|
+
# pp results
|
299
|
+
|
300
|
+
results.dig(0, :clause)
|
301
|
+
|
302
|
+
# rubocop:enable Layout/LineLength
|
303
|
+
end
|
304
|
+
|
305
|
+
def extract_source_relationship(str)
|
306
|
+
type = case str
|
307
|
+
when /≠/
|
308
|
+
:not_equal
|
309
|
+
when /≈/
|
310
|
+
:similar
|
311
|
+
when /^([Ss]ee)|([Vv]oir)/
|
312
|
+
:related
|
313
|
+
when /MOD/, /ИЗМ/
|
314
|
+
:modified
|
315
|
+
when /modified/, /modifié/
|
316
|
+
:modified
|
317
|
+
when /^(from|d'après)/,
|
318
|
+
/^(definition (.+) of)|(définition (.+) de la)/
|
319
|
+
:identical
|
320
|
+
else
|
321
|
+
:identical
|
322
|
+
end
|
323
|
+
|
324
|
+
case str
|
325
|
+
when /^MOD ([\d\-])/
|
326
|
+
{
|
327
|
+
"type" => type.to_s,
|
328
|
+
}
|
329
|
+
when /(modified|modifié|modifiée|modifiés|MOD)\s*[–-]?\s+(.+)\Z/
|
330
|
+
{
|
331
|
+
"type" => type.to_s,
|
332
|
+
"modification" => IEV::Converter.mathml_to_asciimath(
|
333
|
+
parse_anchor_tag($2, @term_domain),
|
334
|
+
).strip,
|
335
|
+
}
|
336
|
+
else
|
337
|
+
{
|
338
|
+
"type" => type.to_s,
|
339
|
+
}
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
# Uses Relaton to obtain link for given source ref.
|
344
|
+
def obtain_source_link(ref)
|
345
|
+
RelatonDb.instance.fetch(ref)&.url
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
# rubocop:enable Style/RedundantRegexpEscape
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# (c) Copyright 2020 Ribose Inc.
|
4
|
+
#
|
5
|
+
|
6
|
+
module IEV
|
7
|
+
# Parses information from the spreadsheet's REPLACES column.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# SupersessionParser.new(cell_data_string).supersessions
|
11
|
+
class SupersessionParser
|
12
|
+
include CLI::UI
|
13
|
+
using DataConversions
|
14
|
+
|
15
|
+
attr_reader :raw_str, :src_str
|
16
|
+
|
17
|
+
attr_reader :supersessions
|
18
|
+
|
19
|
+
# Regular expression which describes IEV relation, for example
|
20
|
+
# +881-01-23:1983-01+ or +845-03-55:1987+.
|
21
|
+
IEV_SUPERSESSION_RX = %r{
|
22
|
+
\A
|
23
|
+
(?:IEV\s+)? # some are prefixed with IEV, it is unnecessary though
|
24
|
+
(?<ref>\d{3}-\d{2}-\d{2})
|
25
|
+
\s* # some have whitespaces around the separator
|
26
|
+
: # separator
|
27
|
+
\s* # some have whitespaces around the separator
|
28
|
+
(?<version>[-0-9]+)
|
29
|
+
\Z
|
30
|
+
}x.freeze
|
31
|
+
|
32
|
+
def initialize(source_str)
|
33
|
+
@raw_str = source_str.dup.freeze
|
34
|
+
@src_str = raw_str.sanitize.freeze
|
35
|
+
@supersessions = parse
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def parse
|
41
|
+
return if empty_source?
|
42
|
+
|
43
|
+
if IEV_SUPERSESSION_RX =~ src_str
|
44
|
+
[relation_from_match($~)]
|
45
|
+
else
|
46
|
+
warn "Incorrect supersession: '#{src_str}'"
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def empty_source?
|
52
|
+
/\w/ !~ src_str
|
53
|
+
end
|
54
|
+
|
55
|
+
def relation_from_match(match_data)
|
56
|
+
{
|
57
|
+
"type" => "supersedes",
|
58
|
+
"ref" => iev_ref_from_match(match_data),
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
62
|
+
def iev_ref_from_match(match_data)
|
63
|
+
{
|
64
|
+
"source" => "IEV",
|
65
|
+
"id" => match_data[:ref],
|
66
|
+
"version" => match_data[:version],
|
67
|
+
}
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|