tc211-termbase 0.2.1 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +13 -0
- data/.github/workflows/release.yml +23 -0
- data/.gitignore +3 -1
- data/exe/tc211-termbase-xlsx2yaml +2 -5
- data/lib/tc211/termbase/concept.rb +90 -39
- data/lib/tc211/termbase/concept_collection.rb +29 -21
- data/lib/tc211/termbase/glossarist/concept.rb +33 -0
- data/lib/tc211/termbase/glossarist/managed_concept.rb +34 -0
- data/lib/tc211/termbase/information_sheet.rb +11 -15
- data/lib/tc211/termbase/metadata_section.rb +108 -112
- data/lib/tc211/termbase/sheet_section.rb +18 -19
- data/lib/tc211/termbase/term.rb +353 -230
- data/lib/tc211/termbase/term_workbook.rb +35 -36
- data/lib/tc211/termbase/terminology_sheet.rb +84 -74
- data/lib/tc211/termbase/terms_section.rb +9 -8
- data/lib/tc211/termbase/version.rb +1 -1
- data/lib/tc211/termbase.rb +18 -0
- data/tc211-termbase.gemspec +6 -6
- metadata +22 -39
- data/Gemfile.lock +0 -122
data/lib/tc211/termbase/term.rb
CHANGED
@@ -1,266 +1,389 @@
|
|
1
1
|
module Tc211::Termbase
|
2
|
+
class Term
|
3
|
+
INPUT_ATTRIBS = %i(
|
4
|
+
id
|
5
|
+
term
|
6
|
+
abbrev
|
7
|
+
synonyms
|
8
|
+
alt
|
9
|
+
definition
|
10
|
+
country_code
|
11
|
+
language_code
|
12
|
+
notes
|
13
|
+
examples
|
14
|
+
entry_status
|
15
|
+
classification
|
16
|
+
review_indicator
|
17
|
+
authoritative_source
|
18
|
+
authoritative_source_similarity
|
19
|
+
lineage_source
|
20
|
+
lineage_source_similarity
|
21
|
+
date_accepted
|
22
|
+
date_amended
|
23
|
+
review_date
|
24
|
+
review_status
|
25
|
+
review_type
|
26
|
+
review_decision
|
27
|
+
review_decision_date
|
28
|
+
review_decision_event
|
29
|
+
review_decision_notes
|
30
|
+
release
|
31
|
+
).freeze
|
32
|
+
|
33
|
+
OUTPUT_ATTRIBS = INPUT_ATTRIBS - %i(term alt abbrev synonyms classification) + %i(terms)
|
34
|
+
|
35
|
+
attr_accessor *(INPUT_ATTRIBS | OUTPUT_ATTRIBS)
|
36
|
+
|
37
|
+
def initialize(options = {})
|
38
|
+
@examples = []
|
39
|
+
@notes = []
|
40
|
+
@definition = []
|
41
|
+
|
42
|
+
# puts "options #{options.inspect}"
|
43
|
+
|
44
|
+
options.each_pair do |k, v|
|
45
|
+
v = v.strip if v.is_a?(String)
|
46
|
+
next unless v
|
47
|
+
|
48
|
+
case k
|
49
|
+
when /^example/
|
50
|
+
add_example(v)
|
51
|
+
when /^note/
|
52
|
+
add_note(v)
|
53
|
+
else
|
54
|
+
# puts"Key #{k}"
|
55
|
+
key = k.gsub("-", "_")
|
56
|
+
send("#{key}=", v)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
STRIP_PUNCTUATION = [
|
63
|
+
":",
|
64
|
+
":",
|
65
|
+
".",
|
66
|
+
"–",
|
67
|
+
"\-",
|
68
|
+
].freeze
|
69
|
+
|
70
|
+
# WARNING
|
71
|
+
# Always put the longer Regexp match in front!
|
72
|
+
EXAMPLE_PREFIXES = {
|
73
|
+
# TODO: fix this, we should not have "EXAMPLES"
|
74
|
+
eng: ["EXAMPLES", "EXAMPLE"],
|
75
|
+
ara: "مثال",
|
76
|
+
chi: "示例",
|
77
|
+
dan: "EKSEMPEL",
|
78
|
+
dut: ["VOORBEELD", "VOORBEELDEN"],
|
79
|
+
fin: "ESIM",
|
80
|
+
fre: "Exemple",
|
81
|
+
# ger: "",
|
82
|
+
jpn: "例",
|
83
|
+
kor: "보기",
|
84
|
+
pol: "PRZYKŁAD",
|
85
|
+
may: "Contoh",
|
86
|
+
rus: "Пример",
|
87
|
+
spa: "Ejemplo",
|
88
|
+
swe: "Exempel",
|
89
|
+
}.freeze
|
90
|
+
|
91
|
+
# WARNING
|
92
|
+
# Always put the longer Regexp match in front!
|
93
|
+
NOTE_PREFIXES = {
|
94
|
+
eng: ["Note \\d to entry", "NOTE"],
|
95
|
+
ara: "ملاحظة",
|
96
|
+
chi: "注",
|
97
|
+
dan: "Note",
|
98
|
+
dut: "OPMERKING",
|
99
|
+
# Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1."
|
100
|
+
# (numeral added by the method)
|
101
|
+
fin: "HUOM\\.?",
|
102
|
+
fre: "A noter",
|
103
|
+
# ger: "",
|
104
|
+
jpn: "備考",
|
105
|
+
kor: "비고",
|
106
|
+
pol: "UWAGA",
|
107
|
+
may: "catatan",
|
108
|
+
rus: "нота",
|
109
|
+
spa: "Nota",
|
110
|
+
swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."],
|
111
|
+
}.freeze
|
112
|
+
|
113
|
+
# To match Chinese and Japanese numerals
|
114
|
+
ALL_FULL_HALF_WIDTH_NUMBERS = "[0-90-9]".freeze
|
115
|
+
|
116
|
+
SOURCE_STATUSES = {
|
117
|
+
1 => "identical",
|
118
|
+
2 => "restyle",
|
119
|
+
3 => "context_added",
|
120
|
+
4 => "generalisation",
|
121
|
+
5 => "specialisation",
|
122
|
+
6 => "unspecified",
|
123
|
+
}.freeze
|
124
|
+
|
125
|
+
def add_example(example)
|
126
|
+
c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
|
127
|
+
@examples << c unless c.empty?
|
128
|
+
end
|
2
129
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
authoritative_source
|
14
|
-
authoritative_source_similarity
|
15
|
-
lineage_source
|
16
|
-
lineage_source_similarity
|
17
|
-
date_accepted
|
18
|
-
date_amended
|
19
|
-
review_date
|
20
|
-
review_status
|
21
|
-
review_type
|
22
|
-
review_decision
|
23
|
-
review_decision_date
|
24
|
-
review_decision_event
|
25
|
-
review_decision_notes
|
26
|
-
release
|
27
|
-
)
|
28
|
-
|
29
|
-
OUTPUT_ATTRIBS = INPUT_ATTRIBS - %i(term alt classification) + %i(terms)
|
30
|
-
|
31
|
-
attr_accessor *(INPUT_ATTRIBS | OUTPUT_ATTRIBS)
|
32
|
-
|
33
|
-
def initialize(options={})
|
34
|
-
@examples = []
|
35
|
-
@notes = []
|
36
|
-
|
37
|
-
# puts "options #{options.inspect}"
|
38
|
-
|
39
|
-
options.each_pair do |k, v|
|
40
|
-
v = v.strip if v.is_a?(String)
|
41
|
-
next unless v
|
42
|
-
case k
|
43
|
-
when /^example/
|
44
|
-
add_example(v)
|
45
|
-
when /^note/
|
46
|
-
add_note(v)
|
47
|
-
else
|
48
|
-
# puts"Key #{k}"
|
49
|
-
key = k.gsub("-", "_")
|
50
|
-
self.send("#{key}=", v)
|
130
|
+
def add_note(note)
|
131
|
+
c = clean_prefixed_string(note, NOTE_PREFIXES)
|
132
|
+
@notes << c unless c.empty?
|
133
|
+
end
|
134
|
+
|
135
|
+
def clean_prefixed_string(string, criterion_map)
|
136
|
+
carry = string.to_s.strip
|
137
|
+
criterion_map.values.flatten.each do |mat|
|
138
|
+
# Arabic notes/examples sometimes use parantheses around numbers
|
139
|
+
carry = carry.sub(carry_regex(mat), "")
|
51
140
|
end
|
141
|
+
|
142
|
+
carry
|
52
143
|
end
|
53
|
-
self
|
54
|
-
end
|
55
144
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
# Always put the longer Regexp match in front!
|
66
|
-
EXAMPLE_PREFIXES = {
|
67
|
-
# TODO: fix this, we should not have "EXAMPLES"
|
68
|
-
eng: ["EXAMPLES", "EXAMPLE"],
|
69
|
-
ara: "مثال",
|
70
|
-
chi: "示例",
|
71
|
-
dan: "EKSEMPEL",
|
72
|
-
dut: ["VOORBEELD", "VOORBEELDEN"],
|
73
|
-
fin: "ESIM",
|
74
|
-
fre: "Exemple",
|
75
|
-
# ger: "",
|
76
|
-
jpn: "例",
|
77
|
-
kor: "보기",
|
78
|
-
pol: "PRZYKŁAD",
|
79
|
-
may: "Contoh",
|
80
|
-
rus: "Пример",
|
81
|
-
spa: "Ejemplo",
|
82
|
-
swe: "Exempel"
|
83
|
-
}
|
84
|
-
|
85
|
-
# WARNING
|
86
|
-
# Always put the longer Regexp match in front!
|
87
|
-
NOTE_PREFIXES = {
|
88
|
-
eng: ["Note \\d to entry", "NOTE"],
|
89
|
-
ara: "ملاحظة",
|
90
|
-
chi: "注",
|
91
|
-
dan: "Note",
|
92
|
-
dut: "OPMERKING",
|
93
|
-
fin: "HUOM\\.?", # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1." (numeral added by the method)
|
94
|
-
fre: "A noter",
|
95
|
-
# ger: "",
|
96
|
-
jpn: "備考",
|
97
|
-
kor: "비고",
|
98
|
-
pol: "UWAGA",
|
99
|
-
may: "catatan",
|
100
|
-
rus: "нота",
|
101
|
-
spa: "Nota",
|
102
|
-
swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."]
|
103
|
-
}
|
104
|
-
|
105
|
-
# To match Chinese and Japanese numerals
|
106
|
-
ALL_FULL_HALF_WIDTH_NUMBERS = "[0-90-9]"
|
107
|
-
|
108
|
-
def add_example(example)
|
109
|
-
c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
|
110
|
-
@examples << c unless c.empty?
|
111
|
-
end
|
145
|
+
def carry_regex(mat)
|
146
|
+
Regexp.new(
|
147
|
+
[
|
148
|
+
"^#{mat}\s*[#{STRIP_PUNCTUATION.join}]?",
|
149
|
+
"\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*",
|
150
|
+
"[#{STRIP_PUNCTUATION.join}]?\s*",
|
151
|
+
].join,
|
152
|
+
)
|
153
|
+
end
|
112
154
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
155
|
+
# The termid should ALWAYS be an integer.
|
156
|
+
# https://github.com/riboseinc/tc211-termbase/issues/1
|
157
|
+
def id=(newid)
|
158
|
+
@id = Integer(newid)
|
159
|
+
end
|
117
160
|
|
118
|
-
|
119
|
-
|
120
|
-
criterion_map.values.flatten.each do |mat|
|
121
|
-
# puts "example string: #{carry}, mat: #{mat}"
|
122
|
-
|
123
|
-
# puts "note string: #{carry}, mat: #{mat}"
|
124
|
-
# if @id == 318 and mat == "Nota" and string == "NOTA 1 Una operación tiene un nombre y una lista de parámetros."
|
125
|
-
# require "pry"
|
126
|
-
# binding.pry
|
127
|
-
# end
|
128
|
-
|
129
|
-
# Arabic notes/examples sometimes use parantheses around numbers
|
130
|
-
carry = carry.sub(
|
131
|
-
Regexp.new(
|
132
|
-
"^#{mat}\s*[#{STRIP_PUNCTUATION.join('')}]?" +
|
133
|
-
"\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*"+
|
134
|
-
"[#{STRIP_PUNCTUATION.join('')}]?\s*",
|
135
|
-
Regexp::IGNORECASE
|
136
|
-
),
|
137
|
-
'')
|
161
|
+
def definition=(definition)
|
162
|
+
@definition << definition
|
138
163
|
end
|
139
164
|
|
140
|
-
|
141
|
-
|
165
|
+
def to_hash
|
166
|
+
OUTPUT_ATTRIBS.inject({}) do |acc, attrib|
|
167
|
+
value = send(attrib)
|
168
|
+
if value.nil?
|
169
|
+
acc
|
170
|
+
else
|
171
|
+
acc.merge(attrib.to_s => value)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
142
175
|
|
176
|
+
# entry-status
|
177
|
+
## Must be one of notValid valid superseded retired
|
178
|
+
def entry_status=(value)
|
179
|
+
case value
|
180
|
+
when "有效的", "käytössä", "действующий", "válido"
|
181
|
+
value = "valid"
|
182
|
+
when "korvattu", "reemplazado"
|
183
|
+
value = "superseded"
|
184
|
+
when "информация отсутствует" # "information absent"!?
|
185
|
+
value = "retired"
|
186
|
+
when %w(notValid valid superseded retired) # do nothing
|
187
|
+
end
|
188
|
+
@entry_status = value
|
189
|
+
end
|
143
190
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
191
|
+
# classification
|
192
|
+
## Must be one of the following: preferred admitted deprecated
|
193
|
+
def classification=(value)
|
194
|
+
case value
|
195
|
+
when "", "认可的", "допустимый", "admitido", "adminitido"
|
196
|
+
value = "admitted"
|
197
|
+
when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
|
198
|
+
value = "preferred"
|
199
|
+
when %w(preferred admitted deprecated)
|
200
|
+
# do nothing
|
201
|
+
end
|
202
|
+
@classification = value
|
203
|
+
end
|
149
204
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
205
|
+
# review-indicator
|
206
|
+
# Must be one of the following
|
207
|
+
# <empty field>
|
208
|
+
# Under Review in Source Document
|
209
|
+
def review_indicator=(value)
|
210
|
+
unless ["", "Under Review in Source Document"].include?(value)
|
211
|
+
value = ""
|
157
212
|
end
|
213
|
+
@review_indicator = value
|
158
214
|
end
|
159
|
-
end
|
160
215
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
value
|
171
|
-
|
172
|
-
|
216
|
+
# authoritative-source-similarity
|
217
|
+
# Must be one of the following codes:
|
218
|
+
# identical = 1
|
219
|
+
# restyled = 2
|
220
|
+
# context added = 3
|
221
|
+
# generalisation = 4
|
222
|
+
# specialisation = 5
|
223
|
+
# unspecified = 6
|
224
|
+
def authoritative_source_similarity=(value)
|
225
|
+
unless SOURCE_STATUSES.key?(value)
|
226
|
+
value = 6
|
227
|
+
end
|
228
|
+
@authoritative_source_similarity = value
|
173
229
|
end
|
174
|
-
@entry_status = value
|
175
|
-
end
|
176
230
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
value
|
187
|
-
|
188
|
-
|
231
|
+
# lineage-source-similarity
|
232
|
+
# Must be one of the following codes:
|
233
|
+
# identical = 1
|
234
|
+
# restyled = 2
|
235
|
+
# context added = 3
|
236
|
+
# generalisation = 4
|
237
|
+
# specialisation = 5
|
238
|
+
# unspecified = 6
|
239
|
+
def lineage_source_similarity=(value)
|
240
|
+
unless SOURCE_STATUSES.key?(value)
|
241
|
+
value = 6
|
242
|
+
end
|
243
|
+
@lineage_source_similarity = value
|
189
244
|
end
|
190
|
-
@classification = value
|
191
|
-
end
|
192
245
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
246
|
+
## value Must be one of pending tentative final
|
247
|
+
def review_status=(value)
|
248
|
+
unless ["", "pending", "tentative", "final"].include?(value)
|
249
|
+
value = ""
|
250
|
+
end
|
251
|
+
@review_status = value
|
198
252
|
end
|
199
|
-
@review_indicator = value
|
200
|
-
end
|
201
253
|
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
254
|
+
## value Must be one of supersession, retirement
|
255
|
+
def review_type=(value)
|
256
|
+
unless ["", "supersession", "retirement"].include?(value)
|
257
|
+
value = ""
|
258
|
+
end
|
259
|
+
@review_type = value
|
207
260
|
end
|
208
|
-
@authoritative_source_similarity = value
|
209
|
-
end
|
210
261
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
262
|
+
## value Must be one of withdrawn, accepted notAccepted
|
263
|
+
def review_decision=(value)
|
264
|
+
unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
|
265
|
+
value = ""
|
266
|
+
end
|
267
|
+
@review_decision = value
|
216
268
|
end
|
217
|
-
@authoritative_source_similarity
|
218
|
-
end
|
219
269
|
|
220
|
-
|
221
|
-
|
222
|
-
value = ""
|
270
|
+
def retired?
|
271
|
+
release >= 0
|
223
272
|
end
|
224
|
-
@review_status = value
|
225
|
-
end
|
226
273
|
|
227
|
-
|
228
|
-
|
229
|
-
|
274
|
+
def terms
|
275
|
+
[
|
276
|
+
primary_term_hash,
|
277
|
+
alt_term_hash,
|
278
|
+
abbreviation_term_hash,
|
279
|
+
synonyms_term_hash,
|
280
|
+
].compact
|
230
281
|
end
|
231
|
-
@review_type = value
|
232
|
-
end
|
233
282
|
|
234
|
-
|
235
|
-
|
236
|
-
|
283
|
+
def primary_term_hash
|
284
|
+
return unless term
|
285
|
+
|
286
|
+
{
|
287
|
+
"type" => "expression",
|
288
|
+
"designation" => term,
|
289
|
+
"normative_status" => classification,
|
290
|
+
}
|
237
291
|
end
|
238
|
-
@review_decision = value
|
239
|
-
end
|
240
292
|
|
241
|
-
|
242
|
-
|
243
|
-
end
|
293
|
+
def alt_term_hash
|
294
|
+
return unless alt
|
244
295
|
|
245
|
-
|
246
|
-
|
247
|
-
|
296
|
+
{
|
297
|
+
"type" => "expression",
|
298
|
+
"designation" => alt,
|
299
|
+
"normative_status" => classification,
|
300
|
+
}
|
301
|
+
end
|
248
302
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
303
|
+
def abbreviation_term_hash
|
304
|
+
return unless abbrev
|
305
|
+
|
306
|
+
{
|
307
|
+
"type" => "abbreviation",
|
308
|
+
"designation" => abbrev,
|
309
|
+
}
|
310
|
+
end
|
311
|
+
|
312
|
+
def synonyms_term_hash
|
313
|
+
return unless synonyms
|
314
|
+
|
315
|
+
{
|
316
|
+
"type" => "expression",
|
317
|
+
"designation" => synonyms,
|
318
|
+
}
|
319
|
+
end
|
320
|
+
|
321
|
+
def sources_hash
|
322
|
+
[
|
323
|
+
authoritative_source_hash,
|
324
|
+
lineage_source_hash,
|
325
|
+
].compact
|
326
|
+
end
|
256
327
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
328
|
+
def authoritative_source_hash
|
329
|
+
return unless authoritative_source
|
330
|
+
|
331
|
+
{
|
332
|
+
origin: {
|
333
|
+
link: authoritative_source["link"],
|
334
|
+
ref: authoritative_source["ref"],
|
335
|
+
clause: authoritative_source["clause"],
|
336
|
+
},
|
337
|
+
type: "authoritative",
|
338
|
+
status: SOURCE_STATUSES[authoritative_source_similarity],
|
339
|
+
}
|
340
|
+
end
|
341
|
+
|
342
|
+
def authoritative_source_array
|
343
|
+
return unless authoritative_source
|
344
|
+
|
345
|
+
[
|
346
|
+
"link" => authoritative_source["link"],
|
347
|
+
]
|
348
|
+
end
|
349
|
+
|
350
|
+
def lineage_source_hash
|
351
|
+
return unless lineage_source
|
352
|
+
|
353
|
+
{
|
354
|
+
origin: {
|
355
|
+
ref: lineage_source,
|
356
|
+
},
|
357
|
+
type: "lineage",
|
358
|
+
status: SOURCE_STATUSES[lineage_source_similarity],
|
359
|
+
}
|
360
|
+
end
|
361
|
+
|
362
|
+
def to_localized_concept_hash
|
363
|
+
concept_hash = to_hash
|
364
|
+
|
365
|
+
%w[
|
366
|
+
review_status
|
367
|
+
review_decision
|
368
|
+
review_decision_notes
|
369
|
+
review_indicator
|
370
|
+
authoritative_source
|
371
|
+
authoritative_source_similarity
|
372
|
+
lineage_source
|
373
|
+
lineage_source_similarity
|
374
|
+
country_code
|
375
|
+
].each do |key|
|
376
|
+
concept_hash.delete(key)
|
377
|
+
end
|
378
|
+
|
379
|
+
concept_hash["id"] = concept_hash["id"].to_s
|
380
|
+
concept_hash["sources"] = sources_hash
|
381
|
+
|
382
|
+
if authoritative_source_array
|
383
|
+
concept_hash["authoritativeSource"] = authoritative_source_array
|
384
|
+
end
|
385
|
+
|
386
|
+
concept_hash
|
387
|
+
end
|
263
388
|
end
|
264
389
|
end
|
265
|
-
|
266
|
-
end
|