tc211-termbase 0.2.1 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,266 +1,377 @@
1
1
  module Tc211::Termbase
2
+ class Term
3
+ INPUT_ATTRIBS = %i(
4
+ id
5
+ term
6
+ abbrev
7
+ synonyms
8
+ alt
9
+ definition
10
+ country_code
11
+ language_code
12
+ notes
13
+ examples
14
+ entry_status
15
+ classification
16
+ review_indicator
17
+ authoritative_source
18
+ authoritative_source_similarity
19
+ lineage_source
20
+ lineage_source_similarity
21
+ date_accepted
22
+ date_amended
23
+ review_date
24
+ review_status
25
+ review_type
26
+ review_decision
27
+ review_decision_date
28
+ review_decision_event
29
+ review_decision_notes
30
+ release
31
+ ).freeze
32
+
33
+ OUTPUT_ATTRIBS = INPUT_ATTRIBS - %i(term alt abbrev synonyms classification) + %i(terms)
34
+
35
+ attr_accessor *(INPUT_ATTRIBS | OUTPUT_ATTRIBS)
36
+
37
+ def initialize(options = {})
38
+ @examples = []
39
+ @notes = []
40
+ @definition = []
41
+
42
+ # puts "options #{options.inspect}"
43
+
44
+ options.each_pair do |k, v|
45
+ v = v.strip if v.is_a?(String)
46
+ next unless v
47
+
48
+ case k
49
+ when /^example/
50
+ add_example(v)
51
+ when /^note/
52
+ add_note(v)
53
+ else
54
+ # puts"Key #{k}"
55
+ key = k.gsub("-", "_")
56
+ send("#{key}=", v)
57
+ end
58
+ end
59
+ self
60
+ end
61
+
62
+ STRIP_PUNCTUATION = [
63
+ ":",
64
+ ":",
65
+ ".",
66
+ "–",
67
+ "\-",
68
+ ].freeze
69
+
70
+ # WARNING
71
+ # Always put the longer Regexp match in front!
72
+ EXAMPLE_PREFIXES = {
73
+ # TODO: fix this, we should not have "EXAMPLES"
74
+ eng: ["EXAMPLES", "EXAMPLE"],
75
+ ara: "مثال",
76
+ chi: "示例",
77
+ dan: "EKSEMPEL",
78
+ dut: ["VOORBEELD", "VOORBEELDEN"],
79
+ fin: "ESIM",
80
+ fre: "Exemple",
81
+ # ger: "",
82
+ jpn: "例",
83
+ kor: "보기",
84
+ pol: "PRZYKŁAD",
85
+ may: "Contoh",
86
+ rus: "Пример",
87
+ spa: "Ejemplo",
88
+ swe: "Exempel",
89
+ }.freeze
90
+
91
+ # WARNING
92
+ # Always put the longer Regexp match in front!
93
+ NOTE_PREFIXES = {
94
+ eng: ["Note \\d to entry", "NOTE"],
95
+ ara: "ملاحظة",
96
+ chi: "注",
97
+ dan: "Note",
98
+ dut: "OPMERKING",
99
+ # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1."
100
+ # (numeral added by the method)
101
+ fin: "HUOM\\.?",
102
+ fre: "A noter",
103
+ # ger: "",
104
+ jpn: "備考",
105
+ kor: "비고",
106
+ pol: "UWAGA",
107
+ may: "catatan",
108
+ rus: "нота",
109
+ spa: "Nota",
110
+ swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."],
111
+ }.freeze
112
+
113
+ # To match Chinese and Japanese numerals
114
+ ALL_FULL_HALF_WIDTH_NUMBERS = "[0-90-9]".freeze
115
+
116
+ SOURCE_STATUSES = {
117
+ 1 => "identical",
118
+ 2 => "restyle",
119
+ 3 => "context_added",
120
+ 4 => "generalisation",
121
+ 5 => "specialisation",
122
+ 6 => "unspecified",
123
+ }.freeze
124
+
125
+ def add_example(example)
126
+ c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
127
+ @examples << c unless c.empty?
128
+ end
2
129
 
3
- class Term
4
-
5
- INPUT_ATTRIBS = %i(
6
- id term abbrev synonyms alt definition
7
- country_code
8
- language_code
9
- notes examples
10
- entry_status
11
- classification
12
- review_indicator
13
- authoritative_source
14
- authoritative_source_similarity
15
- lineage_source
16
- lineage_source_similarity
17
- date_accepted
18
- date_amended
19
- review_date
20
- review_status
21
- review_type
22
- review_decision
23
- review_decision_date
24
- review_decision_event
25
- review_decision_notes
26
- release
27
- )
28
-
29
- OUTPUT_ATTRIBS = INPUT_ATTRIBS - %i(term alt classification) + %i(terms)
30
-
31
- attr_accessor *(INPUT_ATTRIBS | OUTPUT_ATTRIBS)
32
-
33
- def initialize(options={})
34
- @examples = []
35
- @notes = []
36
-
37
- # puts "options #{options.inspect}"
38
-
39
- options.each_pair do |k, v|
40
- v = v.strip if v.is_a?(String)
41
- next unless v
42
- case k
43
- when /^example/
44
- add_example(v)
45
- when /^note/
46
- add_note(v)
47
- else
48
- # puts"Key #{k}"
49
- key = k.gsub("-", "_")
50
- self.send("#{key}=", v)
130
+ def add_note(note)
131
+ c = clean_prefixed_string(note, NOTE_PREFIXES)
132
+ @notes << c unless c.empty?
133
+ end
134
+
135
+ def clean_prefixed_string(string, criterion_map)
136
+ carry = string.to_s.strip
137
+ criterion_map.values.flatten.each do |mat|
138
+ # Arabic notes/examples sometimes use parantheses around numbers
139
+ carry = carry.sub(carry_regex(mat), "")
51
140
  end
141
+
142
+ carry
52
143
  end
53
- self
54
- end
55
144
 
56
- STRIP_PUNCTUATION = [
57
- ":",
58
- ":",
59
- ".",
60
- "",
61
- "\-"
62
- ]
63
-
64
- # WARNING
65
- # Always put the longer Regexp match in front!
66
- EXAMPLE_PREFIXES = {
67
- # TODO: fix this, we should not have "EXAMPLES"
68
- eng: ["EXAMPLES", "EXAMPLE"],
69
- ara: "مثال",
70
- chi: "示例",
71
- dan: "EKSEMPEL",
72
- dut: ["VOORBEELD", "VOORBEELDEN"],
73
- fin: "ESIM",
74
- fre: "Exemple",
75
- # ger: "",
76
- jpn: "例",
77
- kor: "보기",
78
- pol: "PRZYKŁAD",
79
- may: "Contoh",
80
- rus: "Пример",
81
- spa: "Ejemplo",
82
- swe: "Exempel"
83
- }
84
-
85
- # WARNING
86
- # Always put the longer Regexp match in front!
87
- NOTE_PREFIXES = {
88
- eng: ["Note \\d to entry", "NOTE"],
89
- ara: "ملاحظة",
90
- chi: "注",
91
- dan: "Note",
92
- dut: "OPMERKING",
93
- fin: "HUOM\\.?", # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1." (numeral added by the method)
94
- fre: "A noter",
95
- # ger: "",
96
- jpn: "備考",
97
- kor: "비고",
98
- pol: "UWAGA",
99
- may: "catatan",
100
- rus: "нота",
101
- spa: "Nota",
102
- swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."]
103
- }
104
-
105
- # To match Chinese and Japanese numerals
106
- ALL_FULL_HALF_WIDTH_NUMBERS = "[0-90-9]"
107
-
108
- def add_example(example)
109
- c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
110
- @examples << c unless c.empty?
111
- end
145
+ def carry_regex(mat)
146
+ Regexp.new(
147
+ [
148
+ "^#{mat}\s*[#{STRIP_PUNCTUATION.join}]?",
149
+ "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*",
150
+ "[#{STRIP_PUNCTUATION.join}]?\s*",
151
+ ].join,
152
+ )
153
+ end
112
154
 
113
- def add_note(note)
114
- c = clean_prefixed_string(note, NOTE_PREFIXES)
115
- @notes << c unless c.empty?
116
- end
155
+ # The termid should ALWAYS be an integer.
156
+ # https://github.com/riboseinc/tc211-termbase/issues/1
157
+ def id=(newid)
158
+ @id = Integer(newid)
159
+ end
117
160
 
118
- def clean_prefixed_string(string, criterion_map)
119
- carry = string.strip
120
- criterion_map.values.flatten.each do |mat|
121
- # puts "example string: #{carry}, mat: #{mat}"
122
-
123
- # puts "note string: #{carry}, mat: #{mat}"
124
- # if @id == 318 and mat == "Nota" and string == "NOTA 1 Una operación tiene un nombre y una lista de parámetros."
125
- # require "pry"
126
- # binding.pry
127
- # end
128
-
129
- # Arabic notes/examples sometimes use parantheses around numbers
130
- carry = carry.sub(
131
- Regexp.new(
132
- "^#{mat}\s*[#{STRIP_PUNCTUATION.join('')}]?" +
133
- "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*"+
134
- "[#{STRIP_PUNCTUATION.join('')}]?\s*",
135
- Regexp::IGNORECASE
136
- ),
137
- '')
161
+ def definition=(definition)
162
+ @definition << definition
138
163
  end
139
164
 
140
- carry
141
- end
165
+ def to_hash
166
+ OUTPUT_ATTRIBS.inject({}) do |acc, attrib|
167
+ value = send(attrib)
168
+ if value.nil?
169
+ acc
170
+ else
171
+ acc.merge(attrib.to_s => value)
172
+ end
173
+ end
174
+ end
142
175
 
176
+ # entry-status
177
+ ## Must be one of notValid valid superseded retired
178
+ def entry_status=(value)
179
+ case value
180
+ when "有效的", "käytössä", "действующий", "válido"
181
+ value = "valid"
182
+ when "korvattu", "reemplazado"
183
+ value = "superseded"
184
+ when "информация отсутствует" # "information absent"!?
185
+ value = "retired"
186
+ when %w(notValid valid superseded retired) # do nothing
187
+ end
188
+ @entry_status = value
189
+ end
143
190
 
144
- # The termid should ALWAYS be an integer.
145
- # https://github.com/riboseinc/tc211-termbase/issues/1
146
- def id=(newid)
147
- @id = Integer(newid)
148
- end
191
+ # classification
192
+ ## Must be one of the following: preferred admitted deprecated
193
+ def classification=(value)
194
+ case value
195
+ when "", "认可的", "допустимый", "admitido", "adminitido"
196
+ value = "admitted"
197
+ when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
198
+ value = "preferred"
199
+ when %w(preferred admitted deprecated)
200
+ # do nothing
201
+ end
202
+ @classification = value
203
+ end
149
204
 
150
- def to_hash
151
- OUTPUT_ATTRIBS.inject({}) do |acc, attrib|
152
- value = self.send(attrib)
153
- unless value.nil?
154
- acc.merge(attrib.to_s => value)
155
- else
156
- acc
205
+ # review-indicator
206
+ # Must be one of the following
207
+ # <empty field>
208
+ # Under Review in Source Document
209
+ def review_indicator=(value)
210
+ unless ["", "Under Review in Source Document"].include?(value)
211
+ value = ""
157
212
  end
213
+ @review_indicator = value
158
214
  end
159
- end
160
215
 
161
- # entry-status
162
- ## Must be one of notValid valid superseded retired
163
- def entry_status=(value)
164
- case value
165
- when "有效的", "käytössä", "действующий", "válido"
166
- value = "valid"
167
- when "korvattu", "reemplazado"
168
- value = "superseded"
169
- when "информация отсутствует" # "information absent"!?
170
- value = "retired"
171
- when %w(notValid valid superseded retired)
172
- # do nothing
216
+ # authoritative-source-similarity
217
+ # Must be one of the following codes:
218
+ # identical = 1
219
+ # restyled = 2
220
+ # context added = 3
221
+ # generalisation = 4
222
+ # specialisation = 5
223
+ # unspecified = 6
224
+ def authoritative_source_similarity=(value)
225
+ unless SOURCE_STATUSES.key?(value)
226
+ value = 6
227
+ end
228
+ @authoritative_source_similarity = value
173
229
  end
174
- @entry_status = value
175
- end
176
230
 
177
- # classification
178
- ## Must be one of the following: preferred admitted deprecated
179
- def classification=(value)
180
- case value
181
- when ""
182
- value = "admitted"
183
- when "认可的", "допустимый", "admitido"
184
- value = "admitted"
185
- when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
186
- value = "preferred"
187
- when %w(preferred admitted deprecated)
188
- # do nothing
231
+ # lineage-source-similarity
232
+ # Must be one of the following codes:
233
+ # identical = 1
234
+ # restyled = 2
235
+ # context added = 3
236
+ # generalisation = 4
237
+ # specialisation = 5
238
+ # unspecified = 6
239
+ def lineage_source_similarity=(value)
240
+ unless SOURCE_STATUSES.key?(value)
241
+ value = 6
242
+ end
243
+ @lineage_source_similarity = value
189
244
  end
190
- @classification = value
191
- end
192
245
 
193
- # review-indicator
194
- ## Must be one of the following <empty field> Under Review in Source Document",
195
- def review_indicator=(value)
196
- unless ["", "Under Review in Source Document"].include?(value)
197
- value = ""
246
+ ## value Must be one of pending tentative final
247
+ def review_status=(value)
248
+ unless ["", "pending", "tentative", "final"].include?(value)
249
+ value = ""
250
+ end
251
+ @review_status = value
252
+ end
253
+
254
+ ## value Must be one of supersession, retirement
255
+ def review_type=(value)
256
+ unless ["", "supersession", "retirement"].include?(value)
257
+ value = ""
258
+ end
259
+ @review_type = value
198
260
  end
199
- @review_indicator = value
200
- end
201
261
 
202
- # authoritative-source-similarity
203
- # ## Must be one of the following codes: identical = 1 restyled = 2 context added = 3 generalisation = 4 specialisation = 5 unspecified = 6",
204
- def authoritative_source_similarity=(value)
205
- unless (1..6).include?(value)
206
- value = 6
262
+ ## value Must be one of withdrawn, accepted notAccepted
263
+ def review_decision=(value)
264
+ unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
265
+ value = ""
266
+ end
267
+ @review_decision = value
207
268
  end
208
- @authoritative_source_similarity = value
209
- end
210
269
 
211
- # lineage-source-similarity
212
- # ## Must be one of the following codes: identical = 1 restyled = 2 context added = 3 generalisation = 4 specialisation = 5 unspecified = 6",
213
- def authoritative_source_similarity=(value)
214
- unless (1..6).include?(value)
215
- value = 6
270
+ def retired?
271
+ release >= 0
216
272
  end
217
- @authoritative_source_similarity
218
- end
219
273
 
220
- def review_status=(value) ## Must be one of pending tentative final
221
- unless ["", "pending", "tentative", "final"].include?(value)
222
- value = ""
274
+ def terms
275
+ [
276
+ primary_term_hash,
277
+ alt_term_hash,
278
+ abbreviation_term_hash,
279
+ synonyms_term_hash,
280
+ ].compact
223
281
  end
224
- @review_status = value
225
- end
226
282
 
227
- def review_type=(value) ## Must be one of supersession, retirement
228
- unless ["", "supersession", "retirement"].include?(value)
229
- value = ""
283
+ def primary_term_hash
284
+ return unless term
285
+
286
+ {
287
+ "type" => "expression",
288
+ "designation" => term,
289
+ "normative_status" => classification,
290
+ }
230
291
  end
231
- @review_type = value
232
- end
233
292
 
234
- def review_decision=(value) ## Must be one of withdrawn, accepted notAccepted
235
- unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
236
- value = ""
293
+ def alt_term_hash
294
+ return unless alt
295
+
296
+ {
297
+ "type" => "expression",
298
+ "designation" => alt,
299
+ "normative_status" => classification,
300
+ }
237
301
  end
238
- @review_decision = value
239
- end
240
302
 
241
- def retired?
242
- release >= 0
243
- end
303
+ def abbreviation_term_hash
304
+ return unless abbrev
244
305
 
245
- def terms
246
- [primary_term_hash, alt_term_hash].compact
247
- end
306
+ {
307
+ "type" => "abbreviation",
308
+ "designation" => abbrev,
309
+ }
310
+ end
248
311
 
249
- def primary_term_hash
250
- {
251
- "type" => "expression",
252
- "designation" => term,
253
- "normative_status" => classification,
254
- } if term
255
- end
312
+ def synonyms_term_hash
313
+ return unless synonyms
256
314
 
257
- def alt_term_hash
258
- {
259
- "type" => "expression",
260
- "designation" => alt,
261
- "normative_status" => classification,
262
- } if alt
315
+ {
316
+ "type" => "expression",
317
+ "designation" => synonyms,
318
+ }
319
+ end
320
+
321
+ def sources_hash
322
+ [
323
+ authoritative_source_hash,
324
+ lineage_source_hash,
325
+ ].compact
326
+ end
327
+
328
+ def authoritative_source_hash
329
+ return unless authoritative_source
330
+
331
+ {
332
+ origin: {
333
+ link: authoritative_source["link"],
334
+ ref: authoritative_source["ref"],
335
+ clause: authoritative_source["clause"],
336
+ },
337
+ type: "authoritative",
338
+ status: SOURCE_STATUSES[authoritative_source_similarity],
339
+ }
340
+ end
341
+
342
+ def lineage_source_hash
343
+ return unless lineage_source
344
+
345
+ {
346
+ origin: {
347
+ ref: lineage_source,
348
+ },
349
+ type: "lineage",
350
+ status: SOURCE_STATUSES[lineage_source_similarity],
351
+ }
352
+ end
353
+
354
+ def to_localized_concept_hash
355
+ localized_concept_hash = to_hash
356
+
357
+ %w[
358
+ review_status
359
+ review_decision
360
+ review_decision_notes
361
+ review_indicator
362
+ authoritative_source
363
+ authoritative_source_similarity
364
+ lineage_source
365
+ lineage_source_similarity
366
+ country_code
367
+ ].each do |key|
368
+ localized_concept_hash.delete(key)
369
+ end
370
+
371
+ localized_concept_hash["id"] = localized_concept_hash["id"].to_s
372
+ localized_concept_hash["sources"] = sources_hash
373
+
374
+ localized_concept_hash
375
+ end
263
376
  end
264
377
  end
265
-
266
- end
@@ -6,48 +6,47 @@ require_relative "information_sheet"
6
6
  require_relative "terminology_sheet"
7
7
 
8
8
  module Tc211::Termbase
9
+ class TermWorkbook
10
+ attr_accessor :workbook, :glossary_info, :languages, :filename
11
+
12
+ SPECIAL_SHEETS = [
13
+ "Glossary Information",
14
+ "Character Encoding Spreadsheet",
15
+ ].freeze
16
+
17
+ def initialize(filepath)
18
+ @filename = filepath
19
+ @workbook = Creek::Book.new(filepath)
20
+ @glossary_info = InformationSheet.new(
21
+ find_sheet_by_name("Glossary Information"),
22
+ )
23
+ @languages = languages_supported
24
+ self
25
+ end
9
26
 
10
- class TermWorkbook
11
- attr_accessor :workbook
12
- attr_accessor :glossary_info
13
- attr_accessor :languages
14
- attr_accessor :filename
15
-
16
- SPECIAL_SHEETS = [
17
- "Glossary Information",
18
- "Character Encoding Spreadsheet"
19
- ]
20
-
21
- def initialize(filepath)
22
- @filename = filepath
23
- @workbook = Creek::Book.new(filepath)
24
- @glossary_info = InformationSheet.new(find_sheet_by_name("Glossary Information"))
25
- @languages = languages_supported
26
- self
27
- end
28
-
29
- def languages_supported
30
- @workbook.sheets.map(&:name).reject! do |name|
31
- SPECIAL_SHEETS.include?(name)
27
+ def languages_supported
28
+ @workbook.sheets.map(&:name).reject! do |name|
29
+ SPECIAL_SHEETS.include?(name)
30
+ end
32
31
  end
33
- end
34
32
 
35
- def language_sheet(lang)
36
- raise unless @languages.include?(lang)
37
- TerminologySheet.new(find_sheet_by_name(lang))
38
- end
33
+ def language_sheet(lang)
34
+ raise unless @languages.include?(lang)
39
35
 
40
- def find_sheet_by_name(sheet_name)
41
- @workbook.sheets.detect do |sheet|
42
- sheet.name == sheet_name
36
+ TerminologySheet.new(find_sheet_by_name(lang))
37
+ end
38
+
39
+ def find_sheet_by_name(sheet_name)
40
+ @workbook.sheets.detect do |sheet|
41
+ sheet.name == sheet_name
42
+ end
43
43
  end
44
- end
45
44
 
46
- def write_glossary_info
47
- glossary_info_fn = Pathname.new(@filename).sub_ext(".yaml")
48
- File.open(glossary_info_fn,"w") do |file|
49
- file.write(glossary_info.to_yaml)
45
+ def write_glossary_info
46
+ glossary_info_fn = Pathname.new(@filename).sub_ext(".yaml")
47
+ File.open(glossary_info_fn, "w") do |file|
48
+ file.write(glossary_info.to_yaml)
49
+ end
50
50
  end
51
51
  end
52
52
  end
53
- end