iev 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +0 -4
- data/.github/workflows/release.yml +1 -4
- data/.rubocop.yml +1 -1
- data/.rubocop_todo.yml +81 -0
- data/Gemfile +11 -3
- data/README.adoc +276 -21
- data/Rakefile +2 -0
- data/bin/console +1 -0
- data/exe/iev +7 -0
- data/iev.gemspec +16 -20
- data/lib/iev/cli/command.rb +38 -38
- data/lib/iev/cli/command_helper.rb +13 -11
- data/lib/iev/cli/ui.rb +5 -5
- data/lib/iev/cli.rb +5 -5
- data/lib/iev/converter/mathml_to_asciimath.rb +75 -77
- data/lib/iev/converter.rb +2 -2
- data/lib/iev/data_conversions.rb +5 -5
- data/lib/iev/db.rb +5 -3
- data/lib/iev/db_cache.rb +7 -5
- data/lib/iev/db_writer.rb +3 -2
- data/lib/iev/iso_639_code.rb +8 -12
- data/lib/iev/profiler.rb +7 -7
- data/lib/iev/relaton_db.rb +8 -12
- data/lib/iev/source_parser.rb +60 -62
- data/lib/iev/supersession_parser.rb +7 -8
- data/lib/iev/term_attrs_parser.rb +22 -23
- data/lib/iev/term_builder.rb +16 -19
- data/lib/iev/utilities.rb +40 -40
- data/lib/iev/version.rb +4 -2
- data/lib/iev.rb +5 -8
- metadata +29 -98
- data/exe/iev-glossarist +0 -21
data/lib/iev/source_parser.rb
CHANGED
@@ -5,13 +5,14 @@
|
|
5
5
|
|
6
6
|
# rubocop:todo Style/RedundantRegexpEscape
|
7
7
|
|
8
|
-
|
8
|
+
require "English"
|
9
|
+
module Iev
|
9
10
|
# Parses information from the spreadsheet's SOURCE column.
|
10
11
|
#
|
11
12
|
# @example
|
12
13
|
# SourceParser.new(cell_data_string).parsed_sources
|
13
14
|
class SourceParser
|
14
|
-
include
|
15
|
+
include Cli::Ui
|
15
16
|
include Utilities
|
16
17
|
using DataConversions
|
17
18
|
|
@@ -32,7 +33,7 @@ module IEV
|
|
32
33
|
end
|
33
34
|
|
34
35
|
def split_source_field(source)
|
35
|
-
# TODO Calling String#gsub with a single hash argument would be probably
|
36
|
+
# TODO: Calling String#gsub with a single hash argument would be probably
|
36
37
|
# better than calling that method multiple times. But change is
|
37
38
|
# not necessarily that easy to do.
|
38
39
|
|
@@ -63,7 +64,9 @@ module IEV
|
|
63
64
|
source = source.gsub(/,\s+ITU/, ";; ITU")
|
64
65
|
|
65
66
|
# 705-02-01, 702-02-07
|
66
|
-
source = source.gsub(
|
67
|
+
source = source.gsub(
|
68
|
+
/(\d{2,3}-\d{2,3}-\d{2,3}),\s*(\d{2,3}-\d{2,3}-\d{2,3})/, '\1;; \2'
|
69
|
+
)
|
67
70
|
|
68
71
|
source.split(";;").map(&:strip)
|
69
72
|
end
|
@@ -79,7 +82,7 @@ module IEV
|
|
79
82
|
"clause" => clause,
|
80
83
|
"link" => obtain_source_link(source_ref),
|
81
84
|
"relationship" => relation_type,
|
82
|
-
"original" =>
|
85
|
+
"original" => Iev::Converter.mathml_to_asciimath(
|
83
86
|
parse_anchor_tag(raw_ref, @term_domain),
|
84
87
|
),
|
85
88
|
}.compact
|
@@ -88,8 +91,6 @@ module IEV
|
|
88
91
|
end
|
89
92
|
|
90
93
|
def normalize_ref_string(str)
|
91
|
-
# rubocop:todo Layout/LineLength
|
92
|
-
|
93
94
|
# définition 3.60 de la 62127-1
|
94
95
|
# definition 3.60 of 62127-1
|
95
96
|
# définition 3.60 de la 62127-1
|
@@ -103,7 +104,7 @@ module IEV
|
|
103
104
|
str
|
104
105
|
.gsub(/CEI/, "IEC")
|
105
106
|
.gsub(/Guide IEC/, "IEC Guide")
|
106
|
-
.gsub(
|
107
|
+
.gsub(%r{Guide ISO/IEC}, "ISO/IEC Guide")
|
107
108
|
.gsub(/VEI/, "IEV")
|
108
109
|
.gsub(/UIT/, "ITU")
|
109
110
|
.gsub(/IUT-R/, "ITU-R")
|
@@ -120,8 +121,6 @@ module IEV
|
|
120
121
|
.sub(/(\d{3})\ (\d{2})\ (\d{2})/, '\1-\2-\3') # for 221 04 03
|
121
122
|
|
122
123
|
# .sub(/\A(from|d'après|voir la|see|See|voir|Voir)\s+/, "")
|
123
|
-
|
124
|
-
# rubocop:enable Layout/LineLength
|
125
124
|
end
|
126
125
|
|
127
126
|
def extract_source_ref(str)
|
@@ -142,66 +141,69 @@ module IEV
|
|
142
141
|
"JCGM VIM"
|
143
142
|
# IEC 60050-121, 151-12-05
|
144
143
|
when /IEC 60050-(\d+), (\d{2,3}-\d{2,3}-\d{2,3})/
|
145
|
-
"IEC 60050-#{
|
144
|
+
"IEC 60050-#{::Regexp.last_match(1)}"
|
146
145
|
when /IEC 60050-(\d+):(\d+), (\d{2,3}-\d{2,3}-\d{2,3})/
|
147
|
-
"IEC 60050-#{
|
146
|
+
"IEC 60050-#{::Regexp.last_match(1)}:#{::Regexp.last_match(2)}"
|
148
147
|
when /(AIEA|IAEA) (\d+)/
|
149
|
-
"IAEA #{
|
148
|
+
"IAEA #{::Regexp.last_match(2)}"
|
150
149
|
when /IEC\sIEEE ([\d\:\-]+)/
|
151
|
-
"IEC/IEEE #{
|
150
|
+
"IEC/IEEE #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
152
151
|
when /CISPR ([\d\:\-]+)/
|
153
|
-
"IEC CISPR #{
|
152
|
+
"IEC CISPR #{::Regexp.last_match(1)}"
|
154
153
|
when /RR (\d+)/
|
155
154
|
"ITU-R RR"
|
156
155
|
# IEC 50(845)
|
157
156
|
when /IEC (\d+)\((\d+)\)/
|
158
|
-
"IEC 600#{
|
159
|
-
when
|
160
|
-
"#{
|
161
|
-
|
162
|
-
|
163
|
-
when /
|
164
|
-
"ISO/IEC
|
157
|
+
"IEC 600#{::Regexp.last_match(1)}-#{::Regexp.last_match(1)}"
|
158
|
+
when %r{(ISO|IEC)[/\ ](PAS|TR|TS) ([\d\:\-]+)}
|
159
|
+
"#{::Regexp.last_match(1)}/#{::Regexp.last_match(2)} #{::Regexp.last_match(3)}".sub(
|
160
|
+
/:\Z/, ""
|
161
|
+
)
|
162
|
+
when %r{ISO/IEC ([\d\:\-]+)}
|
163
|
+
"ISO/IEC #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
164
|
+
when %r{ISO/IEC/IEEE ([\d\:\-]+)}
|
165
|
+
"ISO/IEC/IEEE #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
165
166
|
|
166
167
|
# ISO 140/4
|
167
|
-
when
|
168
|
-
"ISO #{
|
168
|
+
when %r{ISO (\d+)/(\d+)}
|
169
|
+
"ISO #{::Regexp.last_match(1)}-#{::Regexp.last_match(2)}"
|
169
170
|
when /Norme ISO (\d+)-(\d+)/
|
170
|
-
"ISO #{
|
171
|
-
when /
|
172
|
-
"ISO/IEC Guide #{
|
171
|
+
"ISO #{::Regexp.last_match(1)}:#{::Regexp.last_match(2)}"
|
172
|
+
when %r{ISO/IEC Guide ([\d\:\-]+)}i
|
173
|
+
"ISO/IEC Guide #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
173
174
|
when /(ISO|IEC) Guide ([\d\:\-]+)/i
|
174
|
-
"#{
|
175
|
+
"#{::Regexp.last_match(1)} Guide #{::Regexp.last_match(2)}".sub(/:\Z/,
|
176
|
+
"")
|
175
177
|
|
176
178
|
# ITU-T Recommendation F.791 (11/2015)
|
177
|
-
when
|
178
|
-
"ITU-T Recommendation #{
|
179
|
+
when %r{ITU-T Recommendation (\w.\d+) \((\d+/\d+)\)}i
|
180
|
+
"ITU-T Recommendation #{::Regexp.last_match(1)} (#{::Regexp.last_match(2)})"
|
179
181
|
|
180
182
|
# ITU-T Recommendation F.791:2015
|
181
183
|
when /ITU-T Recommendation (\w.\d+):(\d+)/i
|
182
|
-
"ITU-T Recommendation #{
|
184
|
+
"ITU-T Recommendation #{::Regexp.last_match(1)} (#{::Regexp.last_match(2)})"
|
183
185
|
|
184
186
|
when /ITU-T Recommendation (\w\.\d+)/i
|
185
|
-
"ITU-T Recommendation #{
|
187
|
+
"ITU-T Recommendation #{::Regexp.last_match(1)}"
|
186
188
|
|
187
189
|
# ITU-R Recommendation 592 MOD
|
188
190
|
when /ITU-R Recommendation (\d+)/i
|
189
|
-
"ITU-R Recommendation #{
|
191
|
+
"ITU-R Recommendation #{::Regexp.last_match(1)}"
|
190
192
|
# ISO 669: 2000 3.1.16
|
191
193
|
when /ISO ([\d\-]+:\s?\d{4})/
|
192
|
-
"ISO #{
|
194
|
+
"ISO #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
193
195
|
when /ISO ([\d\:\-]+)/
|
194
|
-
"ISO #{
|
196
|
+
"ISO #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
195
197
|
when /IEC ([\d\:\-]+)/
|
196
|
-
"IEC #{
|
198
|
+
"IEC #{::Regexp.last_match(1)}".sub(/:\Z/, "")
|
197
199
|
when /definition (\d\.[\d\.]+) of ([\d\-]*)/,
|
198
200
|
/définition (\d\.[\d\.]+) de la ([\d\-]*)/
|
199
|
-
"IEC #{
|
201
|
+
"IEC #{::Regexp.last_match(2)}".sub(/:\Z/, "")
|
200
202
|
|
201
203
|
when /IEV (\d{2,3}-\d{2,3}-\d{2,3})/, /(\d{2,3}-\d{2,3}-\d{2,3})/
|
202
204
|
"IEV"
|
203
205
|
when /IEV part\s+(\d+)/, /partie\s+(\d+)\s+de l'IEV/
|
204
|
-
"IEC 60050-#{
|
206
|
+
"IEC 60050-#{::Regexp.last_match(1)}"
|
205
207
|
|
206
208
|
when /International Telecommunication Union (ITU) Constitution/,
|
207
209
|
/Constitution de l’Union internationale des télécommunications (UIT)/
|
@@ -213,8 +215,6 @@ module IEV
|
|
213
215
|
end
|
214
216
|
|
215
217
|
def extract_source_clause(str)
|
216
|
-
# rubocop:todo Layout/LineLength
|
217
|
-
|
218
218
|
# Strip out the modifications
|
219
219
|
str = str.sub(/[,\ ]*modif.+\s[-–].*\Z/, "")
|
220
220
|
|
@@ -278,8 +278,8 @@ module IEV
|
|
278
278
|
|
279
279
|
# "ISO/IEC/IEEE 24765:2010, <i>Systems and software engineering – Vocabulary</i>, 3.234 (2)
|
280
280
|
[/, ([\d\.\w]+ \(\d+\))/, "1"],
|
281
|
-
].map do |regex,
|
282
|
-
# TODO Rubocop complains about unused rule -- need to make sure
|
281
|
+
].map do |regex, _rule|
|
282
|
+
# TODO: Rubocop complains about unused rule -- need to make sure
|
283
283
|
# that no one forgot about something.
|
284
284
|
res = []
|
285
285
|
# puts "str is '#{str}'"
|
@@ -287,7 +287,7 @@ module IEV
|
|
287
287
|
str.scan(regex).each do |result|
|
288
288
|
# puts "result is #{result.first}"
|
289
289
|
res << {
|
290
|
-
index:
|
290
|
+
index: $LAST_MATCH_INFO.offset(0)[0],
|
291
291
|
clause: result.first.strip,
|
292
292
|
}
|
293
293
|
end
|
@@ -298,28 +298,26 @@ module IEV
|
|
298
298
|
# pp results
|
299
299
|
|
300
300
|
results.dig(0, :clause)
|
301
|
-
|
302
|
-
# rubocop:enable Layout/LineLength
|
303
301
|
end
|
304
302
|
|
305
303
|
def extract_source_relationship(str)
|
306
304
|
type = case str
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
305
|
+
when /≠/
|
306
|
+
:not_equal
|
307
|
+
when /≈/
|
308
|
+
:similar
|
309
|
+
when /^([Ss]ee)|([Vv]oir)/
|
310
|
+
:related
|
311
|
+
when /MOD/, /ИЗМ/
|
312
|
+
:modified
|
313
|
+
when /modified/, /modifié/
|
314
|
+
:modified
|
315
|
+
when /^(from|d'après)/,
|
318
316
|
/^(definition (.+) of)|(définition (.+) de la)/
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
317
|
+
:identical
|
318
|
+
else
|
319
|
+
:identical
|
320
|
+
end
|
323
321
|
|
324
322
|
case str
|
325
323
|
when /^MOD ([\d\-])/
|
@@ -329,8 +327,8 @@ module IEV
|
|
329
327
|
when /(modified|modifié|modifiée|modifiés|MOD)\s*[–-]?\s+(.+)\Z/
|
330
328
|
{
|
331
329
|
"type" => type.to_s,
|
332
|
-
"modification" =>
|
333
|
-
parse_anchor_tag(
|
330
|
+
"modification" => Iev::Converter.mathml_to_asciimath(
|
331
|
+
parse_anchor_tag(::Regexp.last_match(2), @term_domain),
|
334
332
|
).strip,
|
335
333
|
}
|
336
334
|
else
|
@@ -3,22 +3,21 @@
|
|
3
3
|
# (c) Copyright 2020 Ribose Inc.
|
4
4
|
#
|
5
5
|
|
6
|
-
|
6
|
+
require "English"
|
7
|
+
module Iev
|
7
8
|
# Parses information from the spreadsheet's REPLACES column.
|
8
9
|
#
|
9
10
|
# @example
|
10
11
|
# SupersessionParser.new(cell_data_string).supersessions
|
11
12
|
class SupersessionParser
|
12
|
-
include
|
13
|
+
include Cli::Ui
|
13
14
|
using DataConversions
|
14
15
|
|
15
|
-
attr_reader :raw_str, :src_str
|
16
|
-
|
17
|
-
attr_reader :supersessions
|
16
|
+
attr_reader :raw_str, :src_str, :supersessions
|
18
17
|
|
19
18
|
# Regular expression which describes IEV relation, for example
|
20
19
|
# +881-01-23:1983-01+ or +845-03-55:1987+.
|
21
|
-
IEV_SUPERSESSION_RX =
|
20
|
+
IEV_SUPERSESSION_RX = /
|
22
21
|
\A
|
23
22
|
(?:IEV\s+)? # some are prefixed with IEV, it is unnecessary though
|
24
23
|
(?<ref>\d{3}-\d{2}-\d{2})
|
@@ -27,7 +26,7 @@ module IEV
|
|
27
26
|
\s* # some have whitespaces around the separator
|
28
27
|
(?<version>[-0-9]+)
|
29
28
|
\Z
|
30
|
-
|
29
|
+
/x
|
31
30
|
|
32
31
|
def initialize(source_str)
|
33
32
|
@raw_str = source_str.dup.freeze
|
@@ -41,7 +40,7 @@ module IEV
|
|
41
40
|
return if empty_source?
|
42
41
|
|
43
42
|
if IEV_SUPERSESSION_RX =~ src_str
|
44
|
-
[relation_from_match(
|
43
|
+
[relation_from_match($LAST_MATCH_INFO)]
|
45
44
|
else
|
46
45
|
warn "Incorrect supersession: '#{src_str}'"
|
47
46
|
nil
|
@@ -3,7 +3,8 @@
|
|
3
3
|
# (c) Copyright 2020 Ribose Inc.
|
4
4
|
#
|
5
5
|
|
6
|
-
|
6
|
+
require "English"
|
7
|
+
module Iev
|
7
8
|
# Parses information from the spreadsheet's TERMATTRIBUTE column and alike.
|
8
9
|
#
|
9
10
|
# @example
|
@@ -12,13 +13,11 @@ module IEV
|
|
12
13
|
# parser.plurality # returns grammatical plurality
|
13
14
|
# parser.part_of_speech # returns part of speech
|
14
15
|
class TermAttrsParser
|
15
|
-
include
|
16
|
+
include Cli::Ui
|
16
17
|
using DataConversions
|
17
18
|
|
18
|
-
attr_reader :raw_str, :src_str
|
19
|
-
|
20
|
-
attr_reader :gender, :geographical_area, :part_of_speech, :plurality,
|
21
|
-
:prefix, :usage_info
|
19
|
+
attr_reader :raw_str, :src_str, :gender, :geographical_area,
|
20
|
+
:part_of_speech, :plurality, :prefix, :usage_info
|
22
21
|
|
23
22
|
PARTS_OF_SPEECH = {
|
24
23
|
"adj" => "adj",
|
@@ -57,12 +56,12 @@ module IEV
|
|
57
56
|
extract_usage_info(curr_str)
|
58
57
|
extract_prefix(curr_str)
|
59
58
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
59
|
+
return unless /\p{Word}/.match?(curr_str)
|
60
|
+
|
61
|
+
debug(
|
62
|
+
:term_attributes,
|
63
|
+
"Term attributes could not be parsed completely: '#{src_str}'",
|
64
|
+
)
|
66
65
|
end
|
67
66
|
|
68
67
|
def extract_gender(str)
|
@@ -78,12 +77,12 @@ module IEV
|
|
78
77
|
if remove_from_string(str, plural_rx)
|
79
78
|
@plurality = "plural"
|
80
79
|
elsif !gender.nil?
|
81
|
-
# TODO Really needed?
|
80
|
+
# TODO: Really needed?
|
82
81
|
@plurality = "singular"
|
83
82
|
end
|
84
83
|
end
|
85
84
|
|
86
|
-
# TODO this is likely buggy
|
85
|
+
# TODO: this is likely buggy
|
87
86
|
def extract_geographical_area(str)
|
88
87
|
ga_rx = /\b[A-Z]{2}$/
|
89
88
|
|
@@ -91,25 +90,25 @@ module IEV
|
|
91
90
|
end
|
92
91
|
|
93
92
|
def extract_part_of_speech(str)
|
94
|
-
pos_rx =
|
93
|
+
pos_rx = /
|
95
94
|
\b
|
96
95
|
#{Regexp.union(PARTS_OF_SPEECH.keys)}
|
97
96
|
\b
|
98
|
-
|
97
|
+
/x
|
99
98
|
|
100
99
|
removed = remove_from_string(str, pos_rx)
|
101
100
|
@part_of_speech = PARTS_OF_SPEECH[removed] || removed
|
102
101
|
end
|
103
102
|
|
104
103
|
def extract_usage_info(str)
|
105
|
-
info_rx =
|
104
|
+
info_rx = /
|
106
105
|
# regular ASCII less and greater than signs
|
107
106
|
< (?<inner>.*?) >
|
108
107
|
|
|
109
108
|
# < and >, i.e. full-width less and greater than signs
|
110
109
|
# which are used instead of ASCII signs in some CJK terms
|
111
110
|
\uFF1C (?<inner>.*?) \uFF1E
|
112
|
-
|
111
|
+
/x
|
113
112
|
|
114
113
|
remove_from_string(str, info_rx) do |md|
|
115
114
|
@usage_info = md[:inner].strip
|
@@ -117,11 +116,11 @@ module IEV
|
|
117
116
|
end
|
118
117
|
|
119
118
|
def extract_prefix(str)
|
120
|
-
prefix_rx =
|
119
|
+
prefix_rx = /
|
121
120
|
\b
|
122
121
|
#{Regexp.union(PREFIX_KEYWORDS)}
|
123
122
|
\b
|
124
|
-
|
123
|
+
/x
|
125
124
|
|
126
125
|
@prefix = true if remove_from_string(str, prefix_rx)
|
127
126
|
end
|
@@ -133,10 +132,10 @@ module IEV
|
|
133
132
|
def remove_from_string(string, regexp)
|
134
133
|
string.sub!(regexp, "")
|
135
134
|
|
136
|
-
if
|
137
|
-
yield
|
135
|
+
if $LAST_MATCH_INFO && block_given?
|
136
|
+
yield $LAST_MATCH_INFO
|
138
137
|
else
|
139
|
-
|
138
|
+
::Regexp.last_match(0) # removed substring or nil
|
140
139
|
end
|
141
140
|
end
|
142
141
|
end
|
data/lib/iev/term_builder.rb
CHANGED
@@ -3,11 +3,9 @@
|
|
3
3
|
# (c) Copyright 2020 Ribose Inc.
|
4
4
|
#
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
module IEV
|
6
|
+
module Iev
|
9
7
|
class TermBuilder
|
10
|
-
include
|
8
|
+
include Cli::Ui
|
11
9
|
include Utilities
|
12
10
|
using DataConversions
|
13
11
|
|
@@ -121,7 +119,7 @@ module IEV
|
|
121
119
|
Note\s*\d+\sto\sthe\sentry: |
|
122
120
|
Note\sto\sentry\s*\d+: |
|
123
121
|
Note\s*\d+?\sà\sl['’]article: |
|
124
|
-
<NOTE
|
122
|
+
<NOTE/?>?\s*\d?\s+.*?– |
|
125
123
|
NOTE(?:\s+-)? |
|
126
124
|
Note\s+\d+\s– |
|
127
125
|
Note \d+\s
|
@@ -137,9 +135,9 @@ module IEV
|
|
137
135
|
next_part_arr = definition_arr
|
138
136
|
remaining_str = find_value_for("DEFINITION")
|
139
137
|
|
140
|
-
while md = remaining_str&.match(slicer_rx)
|
138
|
+
while (md = remaining_str&.match(slicer_rx))
|
141
139
|
next_part = md.pre_match
|
142
|
-
next_part.sub!(/^\[:Ex(a|e)mple\]/,
|
140
|
+
next_part.sub!(/^\[:Ex(a|e)mple\]/, 'Ex\\1mple')
|
143
141
|
next_part_arr.push(next_part)
|
144
142
|
next_part_arr = md[:example] ? @examples : @notes
|
145
143
|
# 112-03-17
|
@@ -157,10 +155,10 @@ module IEV
|
|
157
155
|
# the `Example` with `[:Example]` and revert it in the next iteration
|
158
156
|
# so it will not be caught by the regex.
|
159
157
|
remaining_str = md.post_match
|
160
|
-
remaining_str.sub!(/^Ex(a|e)mple/,
|
158
|
+
remaining_str.sub!(/^Ex(a|e)mple/, '[:Ex\\1mple]') if md[:note]
|
161
159
|
end
|
162
160
|
|
163
|
-
remaining_str&.sub!(/^\[:Ex(a|e)mple\]/,
|
161
|
+
remaining_str&.sub!(/^\[:Ex(a|e)mple\]/, 'Ex\\1mple')
|
164
162
|
next_part_arr.push(remaining_str)
|
165
163
|
@definition = definition_arr.first
|
166
164
|
@definition = nil if @definition&.empty?
|
@@ -208,16 +206,16 @@ module IEV
|
|
208
206
|
end
|
209
207
|
|
210
208
|
def extract_definition_value
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
)
|
215
|
-
|
209
|
+
return unless @definition
|
210
|
+
|
211
|
+
Iev::Converter.mathml_to_asciimath(
|
212
|
+
replace_newlines(parse_anchor_tag(@definition, term_domain)),
|
213
|
+
).strip
|
216
214
|
end
|
217
215
|
|
218
216
|
def extract_examples
|
219
217
|
@examples.map do |str|
|
220
|
-
|
218
|
+
Iev::Converter.mathml_to_asciimath(
|
221
219
|
replace_newlines(parse_anchor_tag(str, term_domain)),
|
222
220
|
).strip
|
223
221
|
end
|
@@ -225,7 +223,7 @@ module IEV
|
|
225
223
|
|
226
224
|
def extract_notes
|
227
225
|
@notes.map do |str|
|
228
|
-
|
226
|
+
Iev::Converter.mathml_to_asciimath(
|
229
227
|
replace_newlines(parse_anchor_tag(str, term_domain)),
|
230
228
|
).strip
|
231
229
|
end
|
@@ -234,7 +232,6 @@ module IEV
|
|
234
232
|
def extract_entry_status
|
235
233
|
case find_value_for("STATUS").downcase
|
236
234
|
when "standard" then "valid"
|
237
|
-
else nil
|
238
235
|
end
|
239
236
|
end
|
240
237
|
|
@@ -275,7 +272,7 @@ module IEV
|
|
275
272
|
private
|
276
273
|
|
277
274
|
def build_expression_designation(raw_term, attribute_data:, status:)
|
278
|
-
term =
|
275
|
+
term = Iev::Converter.mathml_to_asciimath(
|
279
276
|
parse_anchor_tag(raw_term, term_domain),
|
280
277
|
)
|
281
278
|
term_attributes = TermAttrsParser.new(attribute_data.to_s)
|
@@ -299,7 +296,7 @@ module IEV
|
|
299
296
|
end
|
300
297
|
|
301
298
|
def build_symbol_designation(raw_term)
|
302
|
-
term =
|
299
|
+
term = Iev::Converter.mathml_to_asciimath(
|
303
300
|
parse_anchor_tag(raw_term, term_domain),
|
304
301
|
)
|
305
302
|
|
data/lib/iev/utilities.rb
CHANGED
@@ -1,51 +1,51 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
module
|
3
|
+
module Iev
|
4
4
|
module Utilities
|
5
5
|
SIMG_PATH_REGEX = "<simg .*\\/\\$file\\/([\\d\\-\\w\.]+)>"
|
6
6
|
FIGURE_ONE_REGEX =
|
7
|
-
|
8
|
-
FIGURE_TWO_REGEX = "#{FIGURE_ONE_REGEX}\\s*#{FIGURE_ONE_REGEX}"
|
7
|
+
'<p><b>\\s*Figure\\s+(\\d)\\s+[–-]\\s+(.+)\\s*<\\/b>(<\\/p>)?'
|
8
|
+
FIGURE_TWO_REGEX = "#{FIGURE_ONE_REGEX}\\s*#{FIGURE_ONE_REGEX}".freeze
|
9
9
|
IMAGE_PATH_PREFIX = "image::/assets/images/parts"
|
10
10
|
|
11
11
|
def parse_anchor_tag(text, term_domain)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
12
|
+
return unless text
|
13
|
+
|
14
|
+
# Convert IEV term references
|
15
|
+
# Convert href links
|
16
|
+
# Need to take care of this pattern:
|
17
|
+
# `inverse de la <a href="IEV103-06-01">période<a>`
|
18
|
+
text.gsub(
|
19
|
+
%r{<a href="?(IEV)\s*(\d\d\d-\d\d-\d\d\d?)"?>(.*?)</?a>},
|
20
|
+
'{{\3, \1:\2}}',
|
21
|
+
).gsub(
|
22
|
+
%r{<a href="?\s*(\d\d\d-\d\d-\d\d\d?)"?>(.*?)</?a>},
|
23
|
+
'{{\3, IEV:\2}}',
|
24
|
+
).gsub(
|
25
|
+
# To handle <a> tags without ending tag like
|
26
|
+
# `Voir <a href=IEV103-05-21>IEV 103-05-21`
|
27
|
+
# for concept '702-03-11' in `fr`
|
28
|
+
/<a href="?(IEV)?\s*(\d\d\d-\d\d-\d\d\d?)"?>(.*?)$/,
|
29
|
+
'{{\3, IEV:\2}}',
|
30
|
+
).gsub(
|
31
|
+
%r{<a href="?([^<>]*?)"?>(.*?)</a>},
|
32
|
+
'\1[\2]',
|
33
|
+
).gsub(
|
34
|
+
Regexp.new([SIMG_PATH_REGEX, '\\s*', FIGURE_TWO_REGEX].join),
|
35
|
+
"#{IMAGE_PATH_PREFIX}/#{term_domain}/\\1[Figure \\2 - \\3; \\6]",
|
36
|
+
).gsub(
|
37
|
+
Regexp.new([SIMG_PATH_REGEX, '\\s*', FIGURE_ONE_REGEX].join),
|
38
|
+
"#{IMAGE_PATH_PREFIX}/#{term_domain}/\\1[Figure \\2 - \\3]",
|
39
|
+
).gsub(
|
40
|
+
/<img\s+([^<>]+?)\s*>/,
|
41
|
+
"#{IMAGE_PATH_PREFIX}/#{term_domain}/\\1[]",
|
42
|
+
).gsub(
|
43
|
+
/<br>/,
|
44
|
+
"\n",
|
45
|
+
).gsub(
|
46
|
+
%r{<b>(.*?)</b>},
|
47
|
+
'*\\1*',
|
48
|
+
)
|
49
49
|
end
|
50
50
|
|
51
51
|
def replace_newlines(input)
|
data/lib/iev/version.rb
CHANGED
data/lib/iev.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "iev/version"
|
2
4
|
require "iev/db"
|
3
5
|
require "open-uri"
|
@@ -16,14 +18,9 @@ require "yaml"
|
|
16
18
|
require "zeitwerk"
|
17
19
|
|
18
20
|
loader = Zeitwerk::Loader.for_gem
|
19
|
-
loader.inflector.inflect(
|
20
|
-
"cli" => "CLI",
|
21
|
-
"iev" => "IEV",
|
22
|
-
"ui" => "UI",
|
23
|
-
)
|
24
21
|
loader.setup
|
25
22
|
|
26
|
-
module
|
23
|
+
module Iev
|
27
24
|
#
|
28
25
|
# Scrape Electropedia for term.
|
29
26
|
#
|
@@ -42,8 +39,8 @@ module IEV
|
|
42
39
|
"following-sibling::td[2]"
|
43
40
|
a = doc&.at(xpath)&.children&.to_xml
|
44
41
|
a&.sub(%r{<br/>.*$}, "")
|
45
|
-
&.sub(
|
46
|
-
&.gsub(
|
42
|
+
&.sub(/, <.*$/, "")
|
43
|
+
&.gsub(/<[^<>]*>/, "")&.strip
|
47
44
|
end
|
48
45
|
end
|
49
46
|
|