tc211-termbase 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,266 +1,377 @@
1
1
  module Tc211::Termbase
2
+ class Term
3
+ INPUT_ATTRIBS = %i(
4
+ id
5
+ term
6
+ abbrev
7
+ synonyms
8
+ alt
9
+ definition
10
+ country_code
11
+ language_code
12
+ notes
13
+ examples
14
+ entry_status
15
+ classification
16
+ review_indicator
17
+ authoritative_source
18
+ authoritative_source_similarity
19
+ lineage_source
20
+ lineage_source_similarity
21
+ date_accepted
22
+ date_amended
23
+ review_date
24
+ review_status
25
+ review_type
26
+ review_decision
27
+ review_decision_date
28
+ review_decision_event
29
+ review_decision_notes
30
+ release
31
+ ).freeze
32
+
33
+ OUTPUT_ATTRIBS = INPUT_ATTRIBS - %i(term alt abbrev synonyms classification) + %i(terms)
34
+
35
+ attr_accessor *(INPUT_ATTRIBS | OUTPUT_ATTRIBS)
36
+
37
+ def initialize(options = {})
38
+ @examples = []
39
+ @notes = []
40
+ @definition = []
41
+
42
+ # puts "options #{options.inspect}"
43
+
44
+ options.each_pair do |k, v|
45
+ v = v.strip if v.is_a?(String)
46
+ next unless v
47
+
48
+ case k
49
+ when /^example/
50
+ add_example(v)
51
+ when /^note/
52
+ add_note(v)
53
+ else
54
+ # puts"Key #{k}"
55
+ key = k.gsub("-", "_")
56
+ send("#{key}=", v)
57
+ end
58
+ end
59
+ self
60
+ end
61
+
62
+ STRIP_PUNCTUATION = [
63
+ ":",
64
+ ":",
65
+ ".",
66
+ "–",
67
+ "\-",
68
+ ].freeze
69
+
70
+ # WARNING
71
+ # Always put the longer Regexp match in front!
72
+ EXAMPLE_PREFIXES = {
73
+ # TODO: fix this, we should not have "EXAMPLES"
74
+ eng: ["EXAMPLES", "EXAMPLE"],
75
+ ara: "مثال",
76
+ chi: "示例",
77
+ dan: "EKSEMPEL",
78
+ dut: ["VOORBEELD", "VOORBEELDEN"],
79
+ fin: "ESIM",
80
+ fre: "Exemple",
81
+ # ger: "",
82
+ jpn: "例",
83
+ kor: "보기",
84
+ pol: "PRZYKŁAD",
85
+ may: "Contoh",
86
+ rus: "Пример",
87
+ spa: "Ejemplo",
88
+ swe: "Exempel",
89
+ }.freeze
90
+
91
+ # WARNING
92
+ # Always put the longer Regexp match in front!
93
+ NOTE_PREFIXES = {
94
+ eng: ["Note \\d to entry", "NOTE"],
95
+ ara: "ملاحظة",
96
+ chi: "注",
97
+ dan: "Note",
98
+ dut: "OPMERKING",
99
+ # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1."
100
+ # (numeral added by the method)
101
+ fin: "HUOM\\.?",
102
+ fre: "A noter",
103
+ # ger: "",
104
+ jpn: "備考",
105
+ kor: "비고",
106
+ pol: "UWAGA",
107
+ may: "catatan",
108
+ rus: "нота",
109
+ spa: "Nota",
110
+ swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."],
111
+ }.freeze
112
+
113
+ # To match Chinese and Japanese numerals
114
+ ALL_FULL_HALF_WIDTH_NUMBERS = "[0-90-9]".freeze
115
+
116
+ SOURCE_STATUSES = {
117
+ 1 => "identical",
118
+ 2 => "restyle",
119
+ 3 => "context_added",
120
+ 4 => "generalisation",
121
+ 5 => "specialisation",
122
+ 6 => "unspecified",
123
+ }.freeze
124
+
125
+ def add_example(example)
126
+ c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
127
+ @examples << c unless c.empty?
128
+ end
2
129
 
3
- class Term
4
-
5
- INPUT_ATTRIBS = %i(
6
- id term abbrev synonyms alt definition
7
- country_code
8
- language_code
9
- notes examples
10
- entry_status
11
- classification
12
- review_indicator
13
- authoritative_source
14
- authoritative_source_similarity
15
- lineage_source
16
- lineage_source_similarity
17
- date_accepted
18
- date_amended
19
- review_date
20
- review_status
21
- review_type
22
- review_decision
23
- review_decision_date
24
- review_decision_event
25
- review_decision_notes
26
- release
27
- )
28
-
29
- OUTPUT_ATTRIBS = INPUT_ATTRIBS - %i(term alt classification) + %i(terms)
30
-
31
- attr_accessor *(INPUT_ATTRIBS | OUTPUT_ATTRIBS)
32
-
33
- def initialize(options={})
34
- @examples = []
35
- @notes = []
36
-
37
- # puts "options #{options.inspect}"
38
-
39
- options.each_pair do |k, v|
40
- v = v.strip if v.is_a?(String)
41
- next unless v
42
- case k
43
- when /^example/
44
- add_example(v)
45
- when /^note/
46
- add_note(v)
47
- else
48
- # puts"Key #{k}"
49
- key = k.gsub("-", "_")
50
- self.send("#{key}=", v)
130
+ def add_note(note)
131
+ c = clean_prefixed_string(note, NOTE_PREFIXES)
132
+ @notes << c unless c.empty?
133
+ end
134
+
135
+ def clean_prefixed_string(string, criterion_map)
136
+ carry = string.to_s.strip
137
+ criterion_map.values.flatten.each do |mat|
138
+ # Arabic notes/examples sometimes use parantheses around numbers
139
+ carry = carry.sub(carry_regex(mat), "")
51
140
  end
141
+
142
+ carry
52
143
  end
53
- self
54
- end
55
144
 
56
- STRIP_PUNCTUATION = [
57
- ":",
58
- ":",
59
- ".",
60
- "",
61
- "\-"
62
- ]
63
-
64
- # WARNING
65
- # Always put the longer Regexp match in front!
66
- EXAMPLE_PREFIXES = {
67
- # TODO: fix this, we should not have "EXAMPLES"
68
- eng: ["EXAMPLES", "EXAMPLE"],
69
- ara: "مثال",
70
- chi: "示例",
71
- dan: "EKSEMPEL",
72
- dut: ["VOORBEELD", "VOORBEELDEN"],
73
- fin: "ESIM",
74
- fre: "Exemple",
75
- # ger: "",
76
- jpn: "例",
77
- kor: "보기",
78
- pol: "PRZYKŁAD",
79
- may: "Contoh",
80
- rus: "Пример",
81
- spa: "Ejemplo",
82
- swe: "Exempel"
83
- }
84
-
85
- # WARNING
86
- # Always put the longer Regexp match in front!
87
- NOTE_PREFIXES = {
88
- eng: ["Note \\d to entry", "NOTE"],
89
- ara: "ملاحظة",
90
- chi: "注",
91
- dan: "Note",
92
- dut: "OPMERKING",
93
- fin: "HUOM\\.?", # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1." (numeral added by the method)
94
- fre: "A noter",
95
- # ger: "",
96
- jpn: "備考",
97
- kor: "비고",
98
- pol: "UWAGA",
99
- may: "catatan",
100
- rus: "нота",
101
- spa: "Nota",
102
- swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."]
103
- }
104
-
105
- # To match Chinese and Japanese numerals
106
- ALL_FULL_HALF_WIDTH_NUMBERS = "[0-90-9]"
107
-
108
- def add_example(example)
109
- c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
110
- @examples << c unless c.empty?
111
- end
145
+ def carry_regex(mat)
146
+ Regexp.new(
147
+ [
148
+ "^#{mat}\s*[#{STRIP_PUNCTUATION.join}]?",
149
+ "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*",
150
+ "[#{STRIP_PUNCTUATION.join}]?\s*",
151
+ ].join,
152
+ )
153
+ end
112
154
 
113
- def add_note(note)
114
- c = clean_prefixed_string(note, NOTE_PREFIXES)
115
- @notes << c unless c.empty?
116
- end
155
+ # The termid should ALWAYS be an integer.
156
+ # https://github.com/riboseinc/tc211-termbase/issues/1
157
+ def id=(newid)
158
+ @id = Integer(newid)
159
+ end
117
160
 
118
- def clean_prefixed_string(string, criterion_map)
119
- carry = string.strip
120
- criterion_map.values.flatten.each do |mat|
121
- # puts "example string: #{carry}, mat: #{mat}"
122
-
123
- # puts "note string: #{carry}, mat: #{mat}"
124
- # if @id == 318 and mat == "Nota" and string == "NOTA 1 Una operación tiene un nombre y una lista de parámetros."
125
- # require "pry"
126
- # binding.pry
127
- # end
128
-
129
- # Arabic notes/examples sometimes use parantheses around numbers
130
- carry = carry.sub(
131
- Regexp.new(
132
- "^#{mat}\s*[#{STRIP_PUNCTUATION.join('')}]?" +
133
- "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*"+
134
- "[#{STRIP_PUNCTUATION.join('')}]?\s*",
135
- Regexp::IGNORECASE
136
- ),
137
- '')
161
+ def definition=(definition)
162
+ @definition << definition
138
163
  end
139
164
 
140
- carry
141
- end
165
+ def to_hash
166
+ OUTPUT_ATTRIBS.inject({}) do |acc, attrib|
167
+ value = send(attrib)
168
+ if value.nil?
169
+ acc
170
+ else
171
+ acc.merge(attrib.to_s => value)
172
+ end
173
+ end
174
+ end
142
175
 
176
+ # entry-status
177
+ ## Must be one of notValid valid superseded retired
178
+ def entry_status=(value)
179
+ case value
180
+ when "有效的", "käytössä", "действующий", "válido"
181
+ value = "valid"
182
+ when "korvattu", "reemplazado"
183
+ value = "superseded"
184
+ when "информация отсутствует" # "information absent"!?
185
+ value = "retired"
186
+ when %w(notValid valid superseded retired) # do nothing
187
+ end
188
+ @entry_status = value
189
+ end
143
190
 
144
- # The termid should ALWAYS be an integer.
145
- # https://github.com/riboseinc/tc211-termbase/issues/1
146
- def id=(newid)
147
- @id = Integer(newid)
148
- end
191
+ # classification
192
+ ## Must be one of the following: preferred admitted deprecated
193
+ def classification=(value)
194
+ case value
195
+ when "", "认可的", "допустимый", "admitido", "adminitido"
196
+ value = "admitted"
197
+ when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
198
+ value = "preferred"
199
+ when %w(preferred admitted deprecated)
200
+ # do nothing
201
+ end
202
+ @classification = value
203
+ end
149
204
 
150
- def to_hash
151
- OUTPUT_ATTRIBS.inject({}) do |acc, attrib|
152
- value = self.send(attrib)
153
- unless value.nil?
154
- acc.merge(attrib.to_s => value)
155
- else
156
- acc
205
+ # review-indicator
206
+ # Must be one of the following
207
+ # <empty field>
208
+ # Under Review in Source Document
209
+ def review_indicator=(value)
210
+ unless ["", "Under Review in Source Document"].include?(value)
211
+ value = ""
157
212
  end
213
+ @review_indicator = value
158
214
  end
159
- end
160
215
 
161
- # entry-status
162
- ## Must be one of notValid valid superseded retired
163
- def entry_status=(value)
164
- case value
165
- when "有效的", "käytössä", "действующий", "válido"
166
- value = "valid"
167
- when "korvattu", "reemplazado"
168
- value = "superseded"
169
- when "информация отсутствует" # "information absent"!?
170
- value = "retired"
171
- when %w(notValid valid superseded retired)
172
- # do nothing
216
+ # authoritative-source-similarity
217
+ # Must be one of the following codes:
218
+ # identical = 1
219
+ # restyled = 2
220
+ # context added = 3
221
+ # generalisation = 4
222
+ # specialisation = 5
223
+ # unspecified = 6
224
+ def authoritative_source_similarity=(value)
225
+ unless SOURCE_STATUSES.key?(value)
226
+ value = 6
227
+ end
228
+ @authoritative_source_similarity = value
173
229
  end
174
- @entry_status = value
175
- end
176
230
 
177
- # classification
178
- ## Must be one of the following: preferred admitted deprecated
179
- def classification=(value)
180
- case value
181
- when ""
182
- value = "admitted"
183
- when "认可的", "допустимый", "admitido"
184
- value = "admitted"
185
- when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
186
- value = "preferred"
187
- when %w(preferred admitted deprecated)
188
- # do nothing
231
+ # lineage-source-similarity
232
+ # Must be one of the following codes:
233
+ # identical = 1
234
+ # restyled = 2
235
+ # context added = 3
236
+ # generalisation = 4
237
+ # specialisation = 5
238
+ # unspecified = 6
239
+ def lineage_source_similarity=(value)
240
+ unless SOURCE_STATUSES.key?(value)
241
+ value = 6
242
+ end
243
+ @lineage_source_similarity = value
189
244
  end
190
- @classification = value
191
- end
192
245
 
193
- # review-indicator
194
- ## Must be one of the following <empty field> Under Review in Source Document",
195
- def review_indicator=(value)
196
- unless ["", "Under Review in Source Document"].include?(value)
197
- value = ""
246
+ ## value Must be one of pending tentative final
247
+ def review_status=(value)
248
+ unless ["", "pending", "tentative", "final"].include?(value)
249
+ value = ""
250
+ end
251
+ @review_status = value
252
+ end
253
+
254
+ ## value Must be one of supersession, retirement
255
+ def review_type=(value)
256
+ unless ["", "supersession", "retirement"].include?(value)
257
+ value = ""
258
+ end
259
+ @review_type = value
198
260
  end
199
- @review_indicator = value
200
- end
201
261
 
202
- # authoritative-source-similarity
203
- # ## Must be one of the following codes: identical = 1 restyled = 2 context added = 3 generalisation = 4 specialisation = 5 unspecified = 6",
204
- def authoritative_source_similarity=(value)
205
- unless (1..6).include?(value)
206
- value = 6
262
+ ## value Must be one of withdrawn, accepted notAccepted
263
+ def review_decision=(value)
264
+ unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
265
+ value = ""
266
+ end
267
+ @review_decision = value
207
268
  end
208
- @authoritative_source_similarity = value
209
- end
210
269
 
211
- # lineage-source-similarity
212
- # ## Must be one of the following codes: identical = 1 restyled = 2 context added = 3 generalisation = 4 specialisation = 5 unspecified = 6",
213
- def authoritative_source_similarity=(value)
214
- unless (1..6).include?(value)
215
- value = 6
270
+ def retired?
271
+ release >= 0
216
272
  end
217
- @authoritative_source_similarity
218
- end
219
273
 
220
- def review_status=(value) ## Must be one of pending tentative final
221
- unless ["", "pending", "tentative", "final"].include?(value)
222
- value = ""
274
+ def terms
275
+ [
276
+ primary_term_hash,
277
+ alt_term_hash,
278
+ abbreviation_term_hash,
279
+ synonyms_term_hash,
280
+ ].compact
223
281
  end
224
- @review_status = value
225
- end
226
282
 
227
- def review_type=(value) ## Must be one of supersession, retirement
228
- unless ["", "supersession", "retirement"].include?(value)
229
- value = ""
283
+ def primary_term_hash
284
+ return unless term
285
+
286
+ {
287
+ "type" => "expression",
288
+ "designation" => term,
289
+ "normative_status" => classification,
290
+ }
230
291
  end
231
- @review_type = value
232
- end
233
292
 
234
- def review_decision=(value) ## Must be one of withdrawn, accepted notAccepted
235
- unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
236
- value = ""
293
+ def alt_term_hash
294
+ return unless alt
295
+
296
+ {
297
+ "type" => "expression",
298
+ "designation" => alt,
299
+ "normative_status" => classification,
300
+ }
237
301
  end
238
- @review_decision = value
239
- end
240
302
 
241
- def retired?
242
- release >= 0
243
- end
303
+ def abbreviation_term_hash
304
+ return unless abbrev
244
305
 
245
- def terms
246
- [primary_term_hash, alt_term_hash].compact
247
- end
306
+ {
307
+ "type" => "abbreviation",
308
+ "designation" => abbrev,
309
+ }
310
+ end
248
311
 
249
- def primary_term_hash
250
- {
251
- "type" => "expression",
252
- "designation" => term,
253
- "normative_status" => classification,
254
- } if term
255
- end
312
+ def synonyms_term_hash
313
+ return unless synonyms
256
314
 
257
- def alt_term_hash
258
- {
259
- "type" => "expression",
260
- "designation" => alt,
261
- "normative_status" => classification,
262
- } if alt
315
+ {
316
+ "type" => "expression",
317
+ "designation" => synonyms,
318
+ }
319
+ end
320
+
321
+ def sources_hash
322
+ [
323
+ authoritative_source_hash,
324
+ lineage_source_hash,
325
+ ].compact
326
+ end
327
+
328
+ def authoritative_source_hash
329
+ return unless authoritative_source
330
+
331
+ {
332
+ origin: {
333
+ link: authoritative_source["link"],
334
+ ref: authoritative_source["ref"],
335
+ clause: authoritative_source["clause"],
336
+ },
337
+ type: "authoritative",
338
+ status: SOURCE_STATUSES[authoritative_source_similarity],
339
+ }
340
+ end
341
+
342
+ def lineage_source_hash
343
+ return unless lineage_source
344
+
345
+ {
346
+ origin: {
347
+ ref: lineage_source,
348
+ },
349
+ type: "lineage",
350
+ status: SOURCE_STATUSES[lineage_source_similarity],
351
+ }
352
+ end
353
+
354
+ def to_localized_concept_hash
355
+ localized_concept_hash = to_hash
356
+
357
+ %w[
358
+ review_status
359
+ review_decision
360
+ review_decision_notes
361
+ review_indicator
362
+ authoritative_source
363
+ authoritative_source_similarity
364
+ lineage_source
365
+ lineage_source_similarity
366
+ country_code
367
+ ].each do |key|
368
+ localized_concept_hash.delete(key)
369
+ end
370
+
371
+ localized_concept_hash["id"] = localized_concept_hash["id"].to_s
372
+ localized_concept_hash["sources"] = sources_hash
373
+
374
+ localized_concept_hash
375
+ end
263
376
  end
264
377
  end
265
-
266
- end
@@ -6,48 +6,47 @@ require_relative "information_sheet"
6
6
  require_relative "terminology_sheet"
7
7
 
8
8
  module Tc211::Termbase
9
+ class TermWorkbook
10
+ attr_accessor :workbook, :glossary_info, :languages, :filename
11
+
12
+ SPECIAL_SHEETS = [
13
+ "Glossary Information",
14
+ "Character Encoding Spreadsheet",
15
+ ].freeze
16
+
17
+ def initialize(filepath)
18
+ @filename = filepath
19
+ @workbook = Creek::Book.new(filepath)
20
+ @glossary_info = InformationSheet.new(
21
+ find_sheet_by_name("Glossary Information"),
22
+ )
23
+ @languages = languages_supported
24
+ self
25
+ end
9
26
 
10
- class TermWorkbook
11
- attr_accessor :workbook
12
- attr_accessor :glossary_info
13
- attr_accessor :languages
14
- attr_accessor :filename
15
-
16
- SPECIAL_SHEETS = [
17
- "Glossary Information",
18
- "Character Encoding Spreadsheet"
19
- ]
20
-
21
- def initialize(filepath)
22
- @filename = filepath
23
- @workbook = Creek::Book.new(filepath)
24
- @glossary_info = InformationSheet.new(find_sheet_by_name("Glossary Information"))
25
- @languages = languages_supported
26
- self
27
- end
28
-
29
- def languages_supported
30
- @workbook.sheets.map(&:name).reject! do |name|
31
- SPECIAL_SHEETS.include?(name)
27
+ def languages_supported
28
+ @workbook.sheets.map(&:name).reject! do |name|
29
+ SPECIAL_SHEETS.include?(name)
30
+ end
32
31
  end
33
- end
34
32
 
35
- def language_sheet(lang)
36
- raise unless @languages.include?(lang)
37
- TerminologySheet.new(find_sheet_by_name(lang))
38
- end
33
+ def language_sheet(lang)
34
+ raise unless @languages.include?(lang)
39
35
 
40
- def find_sheet_by_name(sheet_name)
41
- @workbook.sheets.detect do |sheet|
42
- sheet.name == sheet_name
36
+ TerminologySheet.new(find_sheet_by_name(lang))
37
+ end
38
+
39
+ def find_sheet_by_name(sheet_name)
40
+ @workbook.sheets.detect do |sheet|
41
+ sheet.name == sheet_name
42
+ end
43
43
  end
44
- end
45
44
 
46
- def write_glossary_info
47
- glossary_info_fn = Pathname.new(@filename).sub_ext(".yaml")
48
- File.open(glossary_info_fn,"w") do |file|
49
- file.write(glossary_info.to_yaml)
45
+ def write_glossary_info
46
+ glossary_info_fn = Pathname.new(@filename).sub_ext(".yaml")
47
+ File.open(glossary_info_fn, "w") do |file|
48
+ file.write(glossary_info.to_yaml)
49
+ end
50
50
  end
51
51
  end
52
52
  end
53
- end