iev 0.3.4 → 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +0 -4
- data/.github/workflows/release.yml +1 -4
- data/.rubocop.yml +1 -1
- data/.rubocop_todo.yml +81 -0
- data/Gemfile +11 -3
- data/README.adoc +276 -21
- data/Rakefile +2 -0
- data/bin/console +1 -0
- data/exe/iev +7 -0
- data/iev.gemspec +15 -19
- data/lib/iev/cli/command.rb +38 -38
- data/lib/iev/cli/command_helper.rb +13 -11
- data/lib/iev/cli/ui.rb +5 -5
- data/lib/iev/cli.rb +5 -5
- data/lib/iev/converter/mathml_to_asciimath.rb +75 -77
- data/lib/iev/converter.rb +2 -2
- data/lib/iev/data_conversions.rb +5 -5
- data/lib/iev/db.rb +5 -3
- data/lib/iev/db_cache.rb +7 -5
- data/lib/iev/db_writer.rb +3 -2
- data/lib/iev/iso_639_code.rb +8 -12
- data/lib/iev/profiler.rb +7 -7
- data/lib/iev/relaton_db.rb +8 -12
- data/lib/iev/source_parser.rb +60 -62
- data/lib/iev/supersession_parser.rb +7 -8
- data/lib/iev/term_attrs_parser.rb +22 -23
- data/lib/iev/term_builder.rb +16 -19
- data/lib/iev/utilities.rb +40 -40
- data/lib/iev/version.rb +4 -2
- data/lib/iev.rb +5 -8
- metadata +25 -94
- data/exe/iev-glossarist +0 -21
data/lib/iev/source_parser.rb
CHANGED
@@ -5,13 +5,14 @@
|
|
5
5
|
|
6
6
|
# rubocop:todo Style/RedundantRegexpEscape
|
7
7
|
|
8
|
-
|
8
|
+
require "English"
|
9
|
+
module Iev
|
9
10
|
# Parses information from the spreadsheet's SOURCE column.
|
10
11
|
#
|
11
12
|
# @example
|
12
13
|
# SourceParser.new(cell_data_string).parsed_sources
|
13
14
|
class SourceParser
|
14
|
-
include
|
15
|
+
include Cli::Ui
|
15
16
|
include Utilities
|
16
17
|
using DataConversions
|
17
18
|
|
@@ -32,7 +33,7 @@ module IEV
|
|
32
33
|
end
|
33
34
|
|
34
35
|
def split_source_field(source)
|
35
|
-
# TODO Calling String#gsub with a single hash argument would be probably
|
36
|
+
# TODO: Calling String#gsub with a single hash argument would be probably
|
36
37
|
# better than calling that method multiple times. But change is
|
37
38
|
# not necessarily that easy to do.
|
38
39
|
|
@@ -63,7 +64,9 @@ module IEV
|
|
63
64
|
source = source.gsub(/,\s+ITU/, ";; ITU")
|
64
65
|
|
65
66
|
# 705-02-01, 702-02-07
|
66
|
-
source = source.gsub(
|
67
|
+
source = source.gsub(
|
68
|
+
/(\d{2,3}-\d{2,3}-\d{2,3}),\s*(\d{2,3}-\d{2,3}-\d{2,3})/, '\1;; \2'
|
69
|
+
)
|
67
70
|
|
68
71
|
source.split(";;").map(&:strip)
|
69
72
|
end
|
@@ -79,7 +82,7 @@ module IEV
|
|
79
82
|
"clause" => clause,
|
80
83
|
"link" => obtain_source_link(source_ref),
|
81
84
|
"relationship" => relation_type,
|
82
|
-
"original" =>
|
85
|
+
"original" => Iev::Converter.mathml_to_asciimath(
|
83
86
|
parse_anchor_tag(raw_ref, @term_domain),
|
84
87
|
),
|
85
88
|
}.compact
|
@@ -88,8 +91,6 @@ module IEV
|
|
88
91
|
end
|
89
92
|
|
90
93
|
def normalize_ref_string(str)
|
91
|
-
# rubocop:todo Layout/LineLength
|
92
|
-
|
93
94
|
# définition 3.60 de la 62127-1
|
94
95
|
# definition 3.60 of 62127-1
|
95
96
|
# définition 3.60 de la 62127-1
|
@@ -103,7 +104,7 @@ module IEV
|
|
103
104
|
str
|
104
105
|
.gsub(/CEI/, "IEC")
|
105
106
|
.gsub(/Guide IEC/, "IEC Guide")
|
106
|
-
.gsub(
|
107
|
+
.gsub(%r{Guide ISO/IEC}, "ISO/IEC Guide")
|
107
108
|
.gsub(/VEI/, "IEV")
|
108
109
|
.gsub(/UIT/, "ITU")
|
109
110
|
.gsub(/IUT-R/, "ITU-R")
|
@@ -120,8 +121,6 @@ module IEV
|
|
120
121
|
.sub(/(\d{3})\ (\d{2})\ (\d{2})/, '\1-\2-\3') # for 221 04 03
|
121
122
|
|
122
123
|
# .sub(/\A(from|d'après|voir la|see|See|voir|Voir)\s+/, "")
|
123
|
-
|
124
|
-
# rubocop:enable Layout/LineLength
|
125
124
|
end
|
126
125
|
|
127
126
|
def extract_source_ref(str)
|
@@ -142,66 +141,69 @@ module IEV
|
|
142
141
|
"JCGM VIM"
|
143
142
|
# IEC 60050-121, 151-12-05
|
144
143
|
when /IEC 60050-(\d+), (\d{2,3}-\d{2,3}-\d{2,3})/
|
145
|
-
"IEC 60050-#{
|
144
|
+
"IEC 60050-#{::Regexp.last_match(1)}"
|
146
145
|
when /IEC 60050-(\d+):(\d+), (\d{2,3}-\d{2,3}-\d{2,3})/
|
147
|
-
"IEC 60050-#{
|
146
|
+
"IEC 60050-#{::Regexp.last_match(1)}:#{::Regexp.last_match(2)}"
|
148
147
|
when /(AIEA|IAEA) (\d+)/
|
149
|
-
"IAEA #{
|
148
|
+
"IAEA #{::Regexp.last_match(2)}"
|
150
149
|
when /IEC\sIEEE ([\d\:\-]+)/
|
151
|
-
"IEC/IEEE #{
|
150
|
+
"IEC/IEEE #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
152
151
|
when /CISPR ([\d\:\-]+)/
|
153
|
-
"IEC CISPR #{
|
152
|
+
"IEC CISPR #{::Regexp.last_match(1)}"
|
154
153
|
when /RR (\d+)/
|
155
154
|
"ITU-R RR"
|
156
155
|
# IEC 50(845)
|
157
156
|
when /IEC (\d+)\((\d+)\)/
|
158
|
-
"IEC 600#{
|
159
|
-
when
|
160
|
-
"#{
|
161
|
-
|
162
|
-
|
163
|
-
when /
|
164
|
-
"ISO/IEC
|
157
|
+
"IEC 600#{::Regexp.last_match(1)}-#{::Regexp.last_match(1)}"
|
158
|
+
when %r{(ISO|IEC)[/\ ](PAS|TR|TS) ([\d\:\-]+)}
|
159
|
+
"#{::Regexp.last_match(1)}/#{::Regexp.last_match(2)} #{::Regexp.last_match(3)}".sub(
|
160
|
+
/:\Z/, ""
|
161
|
+
)
|
162
|
+
when %r{ISO/IEC ([\d\:\-]+)}
|
163
|
+
"ISO/IEC #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
164
|
+
when %r{ISO/IEC/IEEE ([\d\:\-]+)}
|
165
|
+
"ISO/IEC/IEEE #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
165
166
|
|
166
167
|
# ISO 140/4
|
167
|
-
when
|
168
|
-
"ISO #{
|
168
|
+
when %r{ISO (\d+)/(\d+)}
|
169
|
+
"ISO #{::Regexp.last_match(1)}-#{::Regexp.last_match(2)}"
|
169
170
|
when /Norme ISO (\d+)-(\d+)/
|
170
|
-
"ISO #{
|
171
|
-
when /
|
172
|
-
"ISO/IEC Guide #{
|
171
|
+
"ISO #{::Regexp.last_match(1)}:#{::Regexp.last_match(2)}"
|
172
|
+
when %r{ISO/IEC Guide ([\d\:\-]+)}i
|
173
|
+
"ISO/IEC Guide #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
173
174
|
when /(ISO|IEC) Guide ([\d\:\-]+)/i
|
174
|
-
"#{
|
175
|
+
"#{::Regexp.last_match(1)} Guide #{::Regexp.last_match(2)}".sub(/:\Z/,
|
176
|
+
"")
|
175
177
|
|
176
178
|
# ITU-T Recommendation F.791 (11/2015)
|
177
|
-
when
|
178
|
-
"ITU-T Recommendation #{
|
179
|
+
when %r{ITU-T Recommendation (\w.\d+) \((\d+/\d+)\)}i
|
180
|
+
"ITU-T Recommendation #{::Regexp.last_match(1)} (#{::Regexp.last_match(2)})"
|
179
181
|
|
180
182
|
# ITU-T Recommendation F.791:2015
|
181
183
|
when /ITU-T Recommendation (\w.\d+):(\d+)/i
|
182
|
-
"ITU-T Recommendation #{
|
184
|
+
"ITU-T Recommendation #{::Regexp.last_match(1)} (#{::Regexp.last_match(2)})"
|
183
185
|
|
184
186
|
when /ITU-T Recommendation (\w\.\d+)/i
|
185
|
-
"ITU-T Recommendation #{
|
187
|
+
"ITU-T Recommendation #{::Regexp.last_match(1)}"
|
186
188
|
|
187
189
|
# ITU-R Recommendation 592 MOD
|
188
190
|
when /ITU-R Recommendation (\d+)/i
|
189
|
-
"ITU-R Recommendation #{
|
191
|
+
"ITU-R Recommendation #{::Regexp.last_match(1)}"
|
190
192
|
# ISO 669: 2000 3.1.16
|
191
193
|
when /ISO ([\d\-]+:\s?\d{4})/
|
192
|
-
"ISO #{
|
194
|
+
"ISO #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
193
195
|
when /ISO ([\d\:\-]+)/
|
194
|
-
"ISO #{
|
196
|
+
"ISO #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
195
197
|
when /IEC ([\d\:\-]+)/
|
196
|
-
"IEC #{
|
198
|
+
"IEC #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
197
199
|
when /definition (\d\.[\d\.]+) of ([\d\-]*)/,
|
198
200
|
/définition (\d\.[\d\.]+) de la ([\d\-]*)/
|
199
|
-
"IEC #{
|
201
|
+
"IEC #{::Regexp.last_match(2)}".sub(/:\Z/, "")
|
200
202
|
|
201
203
|
when /IEV (\d{2,3}-\d{2,3}-\d{2,3})/, /(\d{2,3}-\d{2,3}-\d{2,3})/
|
202
204
|
"IEV"
|
203
205
|
when /IEV part\s+(\d+)/, /partie\s+(\d+)\s+de l'IEV/
|
204
|
-
"IEC 60050-#{
|
206
|
+
"IEC 60050-#{::Regexp.last_match(1)}"
|
205
207
|
|
206
208
|
when /International Telecommunication Union (ITU) Constitution/,
|
207
209
|
/Constitution de l’Union internationale des télécommunications (UIT)/
|
@@ -213,8 +215,6 @@ module IEV
|
|
213
215
|
end
|
214
216
|
|
215
217
|
def extract_source_clause(str)
|
216
|
-
# rubocop:todo Layout/LineLength
|
217
|
-
|
218
218
|
# Strip out the modifications
|
219
219
|
str = str.sub(/[,\ ]*modif.+\s[-–].*\Z/, "")
|
220
220
|
|
@@ -278,8 +278,8 @@ module IEV
|
|
278
278
|
|
279
279
|
# "ISO/IEC/IEEE 24765:2010, <i>Systems and software engineering – Vocabulary</i>, 3.234 (2)
|
280
280
|
[/, ([\d\.\w]+ \(\d+\))/, "1"],
|
281
|
-
].map do |regex,
|
282
|
-
# TODO Rubocop complains about unused rule -- need to make sure
|
281
|
+
].map do |regex, _rule|
|
282
|
+
# TODO: Rubocop complains about unused rule -- need to make sure
|
283
283
|
# that no one forgot about something.
|
284
284
|
res = []
|
285
285
|
# puts "str is '#{str}'"
|
@@ -287,7 +287,7 @@ module IEV
|
|
287
287
|
str.scan(regex).each do |result|
|
288
288
|
# puts "result is #{result.first}"
|
289
289
|
res << {
|
290
|
-
index:
|
290
|
+
index: $LAST_MATCH_INFO.offset(0)[0],
|
291
291
|
clause: result.first.strip,
|
292
292
|
}
|
293
293
|
end
|
@@ -298,28 +298,26 @@ module IEV
|
|
298
298
|
# pp results
|
299
299
|
|
300
300
|
results.dig(0, :clause)
|
301
|
-
|
302
|
-
# rubocop:enable Layout/LineLength
|
303
301
|
end
|
304
302
|
|
305
303
|
def extract_source_relationship(str)
|
306
304
|
type = case str
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
305
|
+
when /≠/
|
306
|
+
:not_equal
|
307
|
+
when /≈/
|
308
|
+
:similar
|
309
|
+
when /^([Ss]ee)|([Vv]oir)/
|
310
|
+
:related
|
311
|
+
when /MOD/, /ИЗМ/
|
312
|
+
:modified
|
313
|
+
when /modified/, /modifié/
|
314
|
+
:modified
|
315
|
+
when /^(from|d'après)/,
|
318
316
|
/^(definition (.+) of)|(définition (.+) de la)/
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
317
|
+
:identical
|
318
|
+
else
|
319
|
+
:identical
|
320
|
+
end
|
323
321
|
|
324
322
|
case str
|
325
323
|
when /^MOD ([\d\-])/
|
@@ -329,8 +327,8 @@ module IEV
|
|
329
327
|
when /(modified|modifié|modifiée|modifiés|MOD)\s*[–-]?\s+(.+)\Z/
|
330
328
|
{
|
331
329
|
"type" => type.to_s,
|
332
|
-
"modification" =>
|
333
|
-
parse_anchor_tag(
|
330
|
+
"modification" => Iev::Converter.mathml_to_asciimath(
|
331
|
+
parse_anchor_tag(::Regexp.last_match(2), @term_domain),
|
334
332
|
).strip,
|
335
333
|
}
|
336
334
|
else
|
@@ -3,22 +3,21 @@
|
|
3
3
|
# (c) Copyright 2020 Ribose Inc.
|
4
4
|
#
|
5
5
|
|
6
|
-
|
6
|
+
require "English"
|
7
|
+
module Iev
|
7
8
|
# Parses information from the spreadsheet's REPLACES column.
|
8
9
|
#
|
9
10
|
# @example
|
10
11
|
# SupersessionParser.new(cell_data_string).supersessions
|
11
12
|
class SupersessionParser
|
12
|
-
include
|
13
|
+
include Cli::Ui
|
13
14
|
using DataConversions
|
14
15
|
|
15
|
-
attr_reader :raw_str, :src_str
|
16
|
-
|
17
|
-
attr_reader :supersessions
|
16
|
+
attr_reader :raw_str, :src_str, :supersessions
|
18
17
|
|
19
18
|
# Regular expression which describes IEV relation, for example
|
20
19
|
# +881-01-23:1983-01+ or +845-03-55:1987+.
|
21
|
-
IEV_SUPERSESSION_RX =
|
20
|
+
IEV_SUPERSESSION_RX = /
|
22
21
|
\A
|
23
22
|
(?:IEV\s+)? # some are prefixed with IEV, it is unnecessary though
|
24
23
|
(?<ref>\d{3}-\d{2}-\d{2})
|
@@ -27,7 +26,7 @@ module IEV
|
|
27
26
|
\s* # some have whitespaces around the separator
|
28
27
|
(?<version>[-0-9]+)
|
29
28
|
\Z
|
30
|
-
|
29
|
+
/x
|
31
30
|
|
32
31
|
def initialize(source_str)
|
33
32
|
@raw_str = source_str.dup.freeze
|
@@ -41,7 +40,7 @@ module IEV
|
|
41
40
|
return if empty_source?
|
42
41
|
|
43
42
|
if IEV_SUPERSESSION_RX =~ src_str
|
44
|
-
[relation_from_match(
|
43
|
+
[relation_from_match($LAST_MATCH_INFO)]
|
45
44
|
else
|
46
45
|
warn "Incorrect supersession: '#{src_str}'"
|
47
46
|
nil
|
@@ -3,7 +3,8 @@
|
|
3
3
|
# (c) Copyright 2020 Ribose Inc.
|
4
4
|
#
|
5
5
|
|
6
|
-
|
6
|
+
require "English"
|
7
|
+
module Iev
|
7
8
|
# Parses information from the spreadsheet's TERMATTRIBUTE column and alike.
|
8
9
|
#
|
9
10
|
# @example
|
@@ -12,13 +13,11 @@ module IEV
|
|
12
13
|
# parser.plurality # returns grammatical plurality
|
13
14
|
# parser.part_of_speech # returns part of speech
|
14
15
|
class TermAttrsParser
|
15
|
-
include
|
16
|
+
include Cli::Ui
|
16
17
|
using DataConversions
|
17
18
|
|
18
|
-
attr_reader :raw_str, :src_str
|
19
|
-
|
20
|
-
attr_reader :gender, :geographical_area, :part_of_speech, :plurality,
|
21
|
-
:prefix, :usage_info
|
19
|
+
attr_reader :raw_str, :src_str, :gender, :geographical_area,
|
20
|
+
:part_of_speech, :plurality, :prefix, :usage_info
|
22
21
|
|
23
22
|
PARTS_OF_SPEECH = {
|
24
23
|
"adj" => "adj",
|
@@ -57,12 +56,12 @@ module IEV
|
|
57
56
|
extract_usage_info(curr_str)
|
58
57
|
extract_prefix(curr_str)
|
59
58
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
59
|
+
return unless /\p{Word}/.match?(curr_str)
|
60
|
+
|
61
|
+
debug(
|
62
|
+
:term_attributes,
|
63
|
+
"Term attributes could not be parsed completely: '#{src_str}'",
|
64
|
+
)
|
66
65
|
end
|
67
66
|
|
68
67
|
def extract_gender(str)
|
@@ -78,12 +77,12 @@ module IEV
|
|
78
77
|
if remove_from_string(str, plural_rx)
|
79
78
|
@plurality = "plural"
|
80
79
|
elsif !gender.nil?
|
81
|
-
# TODO Really needed?
|
80
|
+
# TODO: Really needed?
|
82
81
|
@plurality = "singular"
|
83
82
|
end
|
84
83
|
end
|
85
84
|
|
86
|
-
# TODO this is likely buggy
|
85
|
+
# TODO: this is likely buggy
|
87
86
|
def extract_geographical_area(str)
|
88
87
|
ga_rx = /\b[A-Z]{2}$/
|
89
88
|
|
@@ -91,25 +90,25 @@ module IEV
|
|
91
90
|
end
|
92
91
|
|
93
92
|
def extract_part_of_speech(str)
|
94
|
-
pos_rx =
|
93
|
+
pos_rx = /
|
95
94
|
\b
|
96
95
|
#{Regexp.union(PARTS_OF_SPEECH.keys)}
|
97
96
|
\b
|
98
|
-
|
97
|
+
/x
|
99
98
|
|
100
99
|
removed = remove_from_string(str, pos_rx)
|
101
100
|
@part_of_speech = PARTS_OF_SPEECH[removed] || removed
|
102
101
|
end
|
103
102
|
|
104
103
|
def extract_usage_info(str)
|
105
|
-
info_rx =
|
104
|
+
info_rx = /
|
106
105
|
# regular ASCII less and greater than signs
|
107
106
|
< (?<inner>.*?) >
|
108
107
|
|
|
109
108
|
# < and >, i.e. full-width less and greater than signs
|
110
109
|
# which are used instead of ASCII signs in some CJK terms
|
111
110
|
\uFF1C (?<inner>.*?) \uFF1E
|
112
|
-
|
111
|
+
/x
|
113
112
|
|
114
113
|
remove_from_string(str, info_rx) do |md|
|
115
114
|
@usage_info = md[:inner].strip
|
@@ -117,11 +116,11 @@ module IEV
|
|
117
116
|
end
|
118
117
|
|
119
118
|
def extract_prefix(str)
|
120
|
-
prefix_rx =
|
119
|
+
prefix_rx = /
|
121
120
|
\b
|
122
121
|
#{Regexp.union(PREFIX_KEYWORDS)}
|
123
122
|
\b
|
124
|
-
|
123
|
+
/x
|
125
124
|
|
126
125
|
@prefix = true if remove_from_string(str, prefix_rx)
|
127
126
|
end
|
@@ -133,10 +132,10 @@ module IEV
|
|
133
132
|
def remove_from_string(string, regexp)
|
134
133
|
string.sub!(regexp, "")
|
135
134
|
|
136
|
-
if
|
137
|
-
yield
|
135
|
+
if $LAST_MATCH_INFO && block_given?
|
136
|
+
yield $LAST_MATCH_INFO
|
138
137
|
else
|
139
|
-
|
138
|
+
::Regexp.last_match(0) # removed substring or nil
|
140
139
|
end
|
141
140
|
end
|
142
141
|
end
|
data/lib/iev/term_builder.rb
CHANGED
@@ -3,11 +3,9 @@
|
|
3
3
|
# (c) Copyright 2020 Ribose Inc.
|
4
4
|
#
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
module IEV
|
6
|
+
module Iev
|
9
7
|
class TermBuilder
|
10
|
-
include
|
8
|
+
include Cli::Ui
|
11
9
|
include Utilities
|
12
10
|
using DataConversions
|
13
11
|
|
@@ -121,7 +119,7 @@ module IEV
|
|
121
119
|
Note\s*\d+\sto\sthe\sentry: |
|
122
120
|
Note\sto\sentry\s*\d+: |
|
123
121
|
Note\s*\d+?\sà\sl['’]article: |
|
124
|
-
<NOTE
|
122
|
+
<NOTE/?>?\s*\d?\s+.*?– |
|
125
123
|
NOTE(?:\s+-)? |
|
126
124
|
Note\s+\d+\s– |
|
127
125
|
Note \d+\s
|
@@ -137,9 +135,9 @@ module IEV
|
|
137
135
|
next_part_arr = definition_arr
|
138
136
|
remaining_str = find_value_for("DEFINITION")
|
139
137
|
|
140
|
-
while md = remaining_str&.match(slicer_rx)
|
138
|
+
while (md = remaining_str&.match(slicer_rx))
|
141
139
|
next_part = md.pre_match
|
142
|
-
next_part.sub!(/^\[:Ex(a|e)mple\]/,
|
140
|
+
next_part.sub!(/^\[:Ex(a|e)mple\]/, 'Ex\\1mple')
|
143
141
|
next_part_arr.push(next_part)
|
144
142
|
next_part_arr = md[:example] ? @examples : @notes
|
145
143
|
# 112-03-17
|
@@ -157,10 +155,10 @@ module IEV
|
|
157
155
|
# the `Example` with `[:Example]` and revert it in the next iteration
|
158
156
|
# so it will not be caught by the regex.
|
159
157
|
remaining_str = md.post_match
|
160
|
-
remaining_str.sub!(/^Ex(a|e)mple/,
|
158
|
+
remaining_str.sub!(/^Ex(a|e)mple/, '[:Ex\\1mple]') if md[:note]
|
161
159
|
end
|
162
160
|
|
163
|
-
remaining_str&.sub!(/^\[:Ex(a|e)mple\]/,
|
161
|
+
remaining_str&.sub!(/^\[:Ex(a|e)mple\]/, 'Ex\\1mple')
|
164
162
|
next_part_arr.push(remaining_str)
|
165
163
|
@definition = definition_arr.first
|
166
164
|
@definition = nil if @definition&.empty?
|
@@ -208,16 +206,16 @@ module IEV
|
|
208
206
|
end
|
209
207
|
|
210
208
|
def extract_definition_value
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
)
|
215
|
-
|
209
|
+
return unless @definition
|
210
|
+
|
211
|
+
Iev::Converter.mathml_to_asciimath(
|
212
|
+
replace_newlines(parse_anchor_tag(@definition, term_domain)),
|
213
|
+
).strip
|
216
214
|
end
|
217
215
|
|
218
216
|
def extract_examples
|
219
217
|
@examples.map do |str|
|
220
|
-
|
218
|
+
Iev::Converter.mathml_to_asciimath(
|
221
219
|
replace_newlines(parse_anchor_tag(str, term_domain)),
|
222
220
|
).strip
|
223
221
|
end
|
@@ -225,7 +223,7 @@ module IEV
|
|
225
223
|
|
226
224
|
def extract_notes
|
227
225
|
@notes.map do |str|
|
228
|
-
|
226
|
+
Iev::Converter.mathml_to_asciimath(
|
229
227
|
replace_newlines(parse_anchor_tag(str, term_domain)),
|
230
228
|
).strip
|
231
229
|
end
|
@@ -234,7 +232,6 @@ module IEV
|
|
234
232
|
def extract_entry_status
|
235
233
|
case find_value_for("STATUS").downcase
|
236
234
|
when "standard" then "valid"
|
237
|
-
else nil
|
238
235
|
end
|
239
236
|
end
|
240
237
|
|
@@ -275,7 +272,7 @@ module IEV
|
|
275
272
|
private
|
276
273
|
|
277
274
|
def build_expression_designation(raw_term, attribute_data:, status:)
|
278
|
-
term =
|
275
|
+
term = Iev::Converter.mathml_to_asciimath(
|
279
276
|
parse_anchor_tag(raw_term, term_domain),
|
280
277
|
)
|
281
278
|
term_attributes = TermAttrsParser.new(attribute_data.to_s)
|
@@ -299,7 +296,7 @@ module IEV
|
|
299
296
|
end
|
300
297
|
|
301
298
|
def build_symbol_designation(raw_term)
|
302
|
-
term =
|
299
|
+
term = Iev::Converter.mathml_to_asciimath(
|
303
300
|
parse_anchor_tag(raw_term, term_domain),
|
304
301
|
)
|
305
302
|
|
data/lib/iev/utilities.rb
CHANGED
@@ -1,51 +1,51 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module Iev
|
4
4
|
module Utilities
|
5
5
|
SIMG_PATH_REGEX = "<simg .*\\/\\$file\\/([\\d\\-\\w\.]+)>"
|
6
6
|
FIGURE_ONE_REGEX =
|
7
|
-
|
8
|
-
FIGURE_TWO_REGEX = "#{FIGURE_ONE_REGEX}\\s*#{FIGURE_ONE_REGEX}"
|
7
|
+
'<p><b>\\s*Figure\\s+(\\d)\\s+[–-]\\s+(.+)\\s*<\\/b>(<\\/p>)?'
|
8
|
+
FIGURE_TWO_REGEX = "#{FIGURE_ONE_REGEX}\\s*#{FIGURE_ONE_REGEX}".freeze
|
9
9
|
IMAGE_PATH_PREFIX = "image::/assets/images/parts"
|
10
10
|
|
11
11
|
def parse_anchor_tag(text, term_domain)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
12
|
+
return unless text
|
13
|
+
|
14
|
+
# Convert IEV term references
|
15
|
+
# Convert href links
|
16
|
+
# Need to take care of this pattern:
|
17
|
+
# `inverse de la <a href="IEV103-06-01">période<a>`
|
18
|
+
text.gsub(
|
19
|
+
%r{<a href="?(IEV)\s*(\d\d\d-\d\d-\d\d\d?)"?>(.*?)</?a>},
|
20
|
+
'{{\3, \1:\2}}',
|
21
|
+
).gsub(
|
22
|
+
%r{<a href="?\s*(\d\d\d-\d\d-\d\d\d?)"?>(.*?)</?a>},
|
23
|
+
'{{\3, IEV:\2}}',
|
24
|
+
).gsub(
|
25
|
+
# To handle <a> tags without ending tag like
|
26
|
+
# `Voir <a href=IEV103-05-21>IEV 103-05-21`
|
27
|
+
# for concept '702-03-11' in `fr`
|
28
|
+
/<a href="?(IEV)?\s*(\d\d\d-\d\d-\d\d\d?)"?>(.*?)$/,
|
29
|
+
'{{\3, IEV:\2}}',
|
30
|
+
).gsub(
|
31
|
+
%r{<a href="?([^<>]*?)"?>(.*?)</a>},
|
32
|
+
'\1[\2]',
|
33
|
+
).gsub(
|
34
|
+
Regexp.new([SIMG_PATH_REGEX, '\\s*', FIGURE_TWO_REGEX].join),
|
35
|
+
"#{IMAGE_PATH_PREFIX}/#{term_domain}/\\1[Figure \\2 - \\3; \\6]",
|
36
|
+
).gsub(
|
37
|
+
Regexp.new([SIMG_PATH_REGEX, '\\s*', FIGURE_ONE_REGEX].join),
|
38
|
+
"#{IMAGE_PATH_PREFIX}/#{term_domain}/\\1[Figure \\2 - \\3]",
|
39
|
+
).gsub(
|
40
|
+
/<img\s+([^<>]+?)\s*>/,
|
41
|
+
"#{IMAGE_PATH_PREFIX}/#{term_domain}/\\1[]",
|
42
|
+
).gsub(
|
43
|
+
/<br>/,
|
44
|
+
"\n",
|
45
|
+
).gsub(
|
46
|
+
%r{<b>(.*?)</b>},
|
47
|
+
'*\\1*',
|
48
|
+
)
|
49
49
|
end
|
50
50
|
|
51
51
|
def replace_newlines(input)
|
data/lib/iev/version.rb
CHANGED
data/lib/iev.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "iev/version"
|
2
4
|
require "iev/db"
|
3
5
|
require "open-uri"
|
@@ -16,14 +18,9 @@ require "yaml"
|
|
16
18
|
require "zeitwerk"
|
17
19
|
|
18
20
|
loader = Zeitwerk::Loader.for_gem
|
19
|
-
loader.inflector.inflect(
|
20
|
-
"cli" => "CLI",
|
21
|
-
"iev" => "IEV",
|
22
|
-
"ui" => "UI",
|
23
|
-
)
|
24
21
|
loader.setup
|
25
22
|
|
26
|
-
module
|
23
|
+
module Iev
|
27
24
|
#
|
28
25
|
# Scrape Electropedia for term.
|
29
26
|
#
|
@@ -42,8 +39,8 @@ module IEV
|
|
42
39
|
"following-sibling::td[2]"
|
43
40
|
a = doc&.at(xpath)&.children&.to_xml
|
44
41
|
a&.sub(%r{<br/>.*$}, "")
|
45
|
-
&.sub(
|
46
|
-
&.gsub(
|
42
|
+
&.sub(/, <.*$/, "")
|
43
|
+
&.gsub(/<[^<>]*>/, "")&.strip
|
47
44
|
end
|
48
45
|
end
|
49
46
|
|