tc211-termbase 0.1.13 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,245 +1,377 @@
1
1
  module Tc211::Termbase
2
+ class Term
3
+ INPUT_ATTRIBS = %i(
4
+ id
5
+ term
6
+ abbrev
7
+ synonyms
8
+ alt
9
+ definition
10
+ country_code
11
+ language_code
12
+ notes
13
+ examples
14
+ entry_status
15
+ classification
16
+ review_indicator
17
+ authoritative_source
18
+ authoritative_source_similarity
19
+ lineage_source
20
+ lineage_source_similarity
21
+ date_accepted
22
+ date_amended
23
+ review_date
24
+ review_status
25
+ review_type
26
+ review_decision
27
+ review_decision_date
28
+ review_decision_event
29
+ review_decision_notes
30
+ release
31
+ ).freeze
2
32
 
3
- class Term
4
-
5
- ATTRIBS = %i(
6
- id term abbrev synonyms alt definition
7
- country_code
8
- language_code
9
- notes examples
10
- entry_status
11
- classification
12
- review_indicator
13
- authoritative_source
14
- authoritative_source_similarity
15
- lineage_source
16
- lineage_source_similarity
17
- date_accepted
18
- date_amended
19
- review_date
20
- review_status
21
- review_type
22
- review_decision
23
- review_decision_date
24
- review_decision_event
25
- review_decision_notes
26
- release
27
- )
28
-
29
- attr_accessor *ATTRIBS
30
-
31
- def initialize(options={})
32
- @examples = []
33
- @notes = []
34
-
35
- # puts "options #{options.inspect}"
36
-
37
- options.each_pair do |k, v|
38
- v = v.strip if v.is_a?(String)
39
- next unless v
40
- case k
41
- when /^example/
42
- add_example(v)
43
- when /^note/
44
- add_note(v)
45
- else
46
- # puts"Key #{k}"
47
- key = k.gsub("-", "_")
48
- self.send("#{key}=", v)
33
+ OUTPUT_ATTRIBS = INPUT_ATTRIBS - %i(term alt abbrev synonyms classification) + %i(terms)
34
+
35
+ attr_accessor *(INPUT_ATTRIBS | OUTPUT_ATTRIBS)
36
+
37
+ def initialize(options = {})
38
+ @examples = []
39
+ @notes = []
40
+ @definition = []
41
+
42
+ # puts "options #{options.inspect}"
43
+
44
+ options.each_pair do |k, v|
45
+ v = v.strip if v.is_a?(String)
46
+ next unless v
47
+
48
+ case k
49
+ when /^example/
50
+ add_example(v)
51
+ when /^note/
52
+ add_note(v)
53
+ else
54
+ # puts"Key #{k}"
55
+ key = k.gsub("-", "_")
56
+ send("#{key}=", v)
57
+ end
49
58
  end
59
+ self
50
60
  end
51
- self
52
- end
53
61
 
54
- STRIP_PUNCTUATION = [
55
- ":",
56
- ":",
57
- ".",
58
- "–",
59
- "\-"
60
- ]
61
-
62
- # WARNING
63
- # Always put the longer Regexp match in front!
64
- EXAMPLE_PREFIXES = {
65
- # TODO: fix this, we should not have "EXAMPLES"
66
- eng: ["EXAMPLES", "EXAMPLE"],
67
- ara: "مثال",
68
- chi: "示例",
69
- dan: "EKSEMPEL",
70
- dut: ["VOORBEELD", "VOORBEELDEN"],
71
- fin: "ESIM",
72
- fre: "Exemple",
73
- # ger: "",
74
- jpn: "例",
75
- kor: "보기",
76
- pol: "PRZYKŁAD",
77
- may: "Contoh",
78
- rus: "Пример",
79
- spa: "Ejemplo",
80
- swe: "Exempel"
81
- }
82
-
83
- # WARNING
84
- # Always put the longer Regexp match in front!
85
- NOTE_PREFIXES = {
86
- eng: ["Note \\d to entry", "NOTE"],
87
- ara: "ملاحظة",
88
- chi: "注",
89
- dan: "Note",
90
- dut: "OPMERKING",
91
- fin: "HUOM\\.?", # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1." (numeral added by the method)
92
- fre: "A noter",
93
- # ger: "",
94
- jpn: "備考",
95
- kor: "비고",
96
- pol: "UWAGA",
97
- may: "catatan",
98
- rus: "нота",
99
- spa: "Nota",
100
- swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."]
101
- }
102
-
103
- # To match Chinese and Japanese numerals
104
- ALL_FULL_HALF_WIDTH_NUMBERS = "[0-90-9]"
105
-
106
- def add_example(example)
107
- c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
108
- @examples << c unless c.empty?
109
- end
62
+ STRIP_PUNCTUATION = [
63
+ ":",
64
+ ":",
65
+ ".",
66
+ "–",
67
+ "\-",
68
+ ].freeze
110
69
 
111
- def add_note(note)
112
- c = clean_prefixed_string(note, NOTE_PREFIXES)
113
- @notes << c unless c.empty?
114
- end
70
+ # WARNING
71
+ # Always put the longer Regexp match in front!
72
+ EXAMPLE_PREFIXES = {
73
+ # TODO: fix this, we should not have "EXAMPLES"
74
+ eng: ["EXAMPLES", "EXAMPLE"],
75
+ ara: "مثال",
76
+ chi: "示例",
77
+ dan: "EKSEMPEL",
78
+ dut: ["VOORBEELD", "VOORBEELDEN"],
79
+ fin: "ESIM",
80
+ fre: "Exemple",
81
+ # ger: "",
82
+ jpn: "例",
83
+ kor: "보기",
84
+ pol: "PRZYKŁAD",
85
+ may: "Contoh",
86
+ rus: "Пример",
87
+ spa: "Ejemplo",
88
+ swe: "Exempel",
89
+ }.freeze
115
90
 
116
- def clean_prefixed_string(string, criterion_map)
117
- carry = string.strip
118
- criterion_map.values.flatten.each do |mat|
119
- # puts "example string: #{carry}, mat: #{mat}"
120
-
121
- # puts "note string: #{carry}, mat: #{mat}"
122
- # if @id == 318 and mat == "Nota" and string == "NOTA 1 Una operación tiene un nombre y una lista de parámetros."
123
- # require "pry"
124
- # binding.pry
125
- # end
126
-
127
- # Arabic notes/examples sometimes use parantheses around numbers
128
- carry = carry.sub(
129
- Regexp.new(
130
- "^#{mat}\s*[#{STRIP_PUNCTUATION.join('')}]?" +
131
- "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*"+
132
- "[#{STRIP_PUNCTUATION.join('')}]?\s*",
133
- Regexp::IGNORECASE
134
- ),
135
- '')
136
- end
137
-
138
- carry
139
- end
91
+ # WARNING
92
+ # Always put the longer Regexp match in front!
93
+ NOTE_PREFIXES = {
94
+ eng: ["Note \\d to entry", "NOTE"],
95
+ ara: "ملاحظة",
96
+ chi: "注",
97
+ dan: "Note",
98
+ dut: "OPMERKING",
99
+ # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1."
100
+ # (numeral added by the method)
101
+ fin: "HUOM\\.?",
102
+ fre: "A noter",
103
+ # ger: "",
104
+ jpn: "備考",
105
+ kor: "비고",
106
+ pol: "UWAGA",
107
+ may: "catatan",
108
+ rus: "нота",
109
+ spa: "Nota",
110
+ swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."],
111
+ }.freeze
140
112
 
113
+ # To match Chinese and Japanese numerals
114
+ ALL_FULL_HALF_WIDTH_NUMBERS = "[0-90-9]".freeze
141
115
 
142
- # The termid should ALWAYS be an integer.
143
- # https://github.com/riboseinc/tc211-termbase/issues/1
144
- def id=(newid)
145
- @id = Integer(newid)
146
- end
116
+ SOURCE_STATUSES = {
117
+ 1 => "identical",
118
+ 2 => "restyle",
119
+ 3 => "context_added",
120
+ 4 => "generalisation",
121
+ 5 => "specialisation",
122
+ 6 => "unspecified",
123
+ }.freeze
147
124
 
148
- def to_hash
149
- ATTRIBS.inject({}) do |acc, attrib|
150
- value = self.send(attrib)
151
- unless value.nil?
152
- acc.merge(attrib.to_s => value)
153
- else
154
- acc
125
+ def add_example(example)
126
+ c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
127
+ @examples << c unless c.empty?
128
+ end
129
+
130
+ def add_note(note)
131
+ c = clean_prefixed_string(note, NOTE_PREFIXES)
132
+ @notes << c unless c.empty?
133
+ end
134
+
135
+ def clean_prefixed_string(string, criterion_map)
136
+ carry = string.to_s.strip
137
+ criterion_map.values.flatten.each do |mat|
138
+ # Arabic notes/examples sometimes use parantheses around numbers
139
+ carry = carry.sub(carry_regex(mat), "")
155
140
  end
141
+
142
+ carry
156
143
  end
157
- end
158
144
 
159
- # entry-status
160
- ## Must be one of notValid valid superseded retired
161
- def entry_status=(value)
162
- case value
163
- when "有效的", "käytössä", "действующий", "válido"
164
- value = "valid"
165
- when "korvattu", "reemplazado"
166
- value = "superseded"
167
- when "информация отсутствует" # "information absent"!?
168
- value = "retired"
169
- when %w(notValid valid superseded retired)
170
- # do nothing
171
- end
172
- @entry_status = value
173
- end
145
+ def carry_regex(mat)
146
+ Regexp.new(
147
+ [
148
+ "^#{mat}\s*[#{STRIP_PUNCTUATION.join}]?",
149
+ "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*",
150
+ "[#{STRIP_PUNCTUATION.join}]?\s*",
151
+ ].join,
152
+ )
153
+ end
174
154
 
175
- # classification
176
- ## Must be one of the following: preferred admitted deprecated
177
- def classification=(value)
178
- case value
179
- when ""
180
- value = "admitted"
181
- when "认可的", "допустимый", "admitido"
182
- value = "admitted"
183
- when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
184
- value = "preferred"
185
- when %w(preferred admitted deprecated)
186
- # do nothing
187
- end
188
- @classification = value
189
- end
155
+ # The termid should ALWAYS be an integer.
156
+ # https://github.com/riboseinc/tc211-termbase/issues/1
157
+ def id=(newid)
158
+ @id = Integer(newid)
159
+ end
190
160
 
191
- # review-indicator
192
- ## Must be one of the following <empty field> Under Review in Source Document",
193
- def review_indicator=(value)
194
- unless ["", "Under Review in Source Document"].include?(value)
195
- value = ""
161
+ def definition=(definition)
162
+ @definition << definition
196
163
  end
197
- @review_indicator = value
198
- end
199
164
 
200
- # authoritative-source-similarity
201
- # ## Must be one of the following codes: identical = 1 restyled = 2 context added = 3 generalisation = 4 specialisation = 5 unspecified = 6",
202
- def authoritative_source_similarity=(value)
203
- unless (1..6).include?(value)
204
- value = 6
165
+ def to_hash
166
+ OUTPUT_ATTRIBS.inject({}) do |acc, attrib|
167
+ value = send(attrib)
168
+ if value.nil?
169
+ acc
170
+ else
171
+ acc.merge(attrib.to_s => value)
172
+ end
173
+ end
205
174
  end
206
- @authoritative_source_similarity = value
207
- end
208
175
 
209
- # lineage-source-similarity
210
- # ## Must be one of the following codes: identical = 1 restyled = 2 context added = 3 generalisation = 4 specialisation = 5 unspecified = 6",
211
- def authoritative_source_similarity=(value)
212
- unless (1..6).include?(value)
213
- value = 6
176
+ # entry-status
177
+ ## Must be one of notValid valid superseded retired
178
+ def entry_status=(value)
179
+ case value
180
+ when "有效的", "käytössä", "действующий", "válido"
181
+ value = "valid"
182
+ when "korvattu", "reemplazado"
183
+ value = "superseded"
184
+ when "информация отсутствует" # "information absent"!?
185
+ value = "retired"
186
+ when %w(notValid valid superseded retired) # do nothing
187
+ end
188
+ @entry_status = value
214
189
  end
215
- @authoritative_source_similarity
216
- end
217
190
 
218
- def review_status=(value) ## Must be one of pending tentative final
219
- unless ["", "pending", "tentative", "final"].include?(value)
220
- value = ""
191
+ # classification
192
+ ## Must be one of the following: preferred admitted deprecated
193
+ def classification=(value)
194
+ case value
195
+ when "", "认可的", "допустимый", "admitido", "adminitido"
196
+ value = "admitted"
197
+ when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
198
+ value = "preferred"
199
+ when %w(preferred admitted deprecated)
200
+ # do nothing
201
+ end
202
+ @classification = value
221
203
  end
222
- @review_status = value
223
- end
224
204
 
225
- def review_type=(value) ## Must be one of supersession, retirement
226
- unless ["", "supersession", "retirement"].include?(value)
227
- value = ""
205
+ # review-indicator
206
+ # Must be one of the following
207
+ # <empty field>
208
+ # Under Review in Source Document
209
+ def review_indicator=(value)
210
+ unless ["", "Under Review in Source Document"].include?(value)
211
+ value = ""
212
+ end
213
+ @review_indicator = value
228
214
  end
229
- @review_type = value
230
- end
231
215
 
232
- def review_decision=(value) ## Must be one of withdrawn, accepted notAccepted
233
- unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
234
- value = ""
216
+ # authoritative-source-similarity
217
+ # Must be one of the following codes:
218
+ # identical = 1
219
+ # restyled = 2
220
+ # context added = 3
221
+ # generalisation = 4
222
+ # specialisation = 5
223
+ # unspecified = 6
224
+ def authoritative_source_similarity=(value)
225
+ unless SOURCE_STATUSES.key?(value)
226
+ value = 6
227
+ end
228
+ @authoritative_source_similarity = value
235
229
  end
236
- @review_decision = value
237
- end
238
230
 
239
- def retired?
240
- release >= 0
241
- end
231
+ # lineage-source-similarity
232
+ # Must be one of the following codes:
233
+ # identical = 1
234
+ # restyled = 2
235
+ # context added = 3
236
+ # generalisation = 4
237
+ # specialisation = 5
238
+ # unspecified = 6
239
+ def lineage_source_similarity=(value)
240
+ unless SOURCE_STATUSES.key?(value)
241
+ value = 6
242
+ end
243
+ @lineage_source_similarity = value
244
+ end
242
245
 
243
- end
246
+ ## value Must be one of pending tentative final
247
+ def review_status=(value)
248
+ unless ["", "pending", "tentative", "final"].include?(value)
249
+ value = ""
250
+ end
251
+ @review_status = value
252
+ end
244
253
 
245
- end
254
+ ## value Must be one of supersession, retirement
255
+ def review_type=(value)
256
+ unless ["", "supersession", "retirement"].include?(value)
257
+ value = ""
258
+ end
259
+ @review_type = value
260
+ end
261
+
262
+ ## value Must be one of withdrawn, accepted notAccepted
263
+ def review_decision=(value)
264
+ unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
265
+ value = ""
266
+ end
267
+ @review_decision = value
268
+ end
269
+
270
+ def retired?
271
+ release >= 0
272
+ end
273
+
274
+ def terms
275
+ [
276
+ primary_term_hash,
277
+ alt_term_hash,
278
+ abbreviation_term_hash,
279
+ synonyms_term_hash,
280
+ ].compact
281
+ end
282
+
283
+ def primary_term_hash
284
+ return unless term
285
+
286
+ {
287
+ "type" => "expression",
288
+ "designation" => term,
289
+ "normative_status" => classification,
290
+ }
291
+ end
292
+
293
+ def alt_term_hash
294
+ return unless alt
295
+
296
+ {
297
+ "type" => "expression",
298
+ "designation" => alt,
299
+ "normative_status" => classification,
300
+ }
301
+ end
302
+
303
+ def abbreviation_term_hash
304
+ return unless abbrev
305
+
306
+ {
307
+ "type" => "abbreviation",
308
+ "designation" => abbrev,
309
+ }
310
+ end
311
+
312
+ def synonyms_term_hash
313
+ return unless synonyms
314
+
315
+ {
316
+ "type" => "expression",
317
+ "designation" => synonyms,
318
+ }
319
+ end
320
+
321
+ def sources_hash
322
+ [
323
+ authoritative_source_hash,
324
+ lineage_source_hash,
325
+ ].compact
326
+ end
327
+
328
+ def authoritative_source_hash
329
+ return unless authoritative_source
330
+
331
+ {
332
+ origin: {
333
+ link: authoritative_source["link"],
334
+ ref: authoritative_source["ref"],
335
+ clause: authoritative_source["clause"],
336
+ },
337
+ type: "authoritative",
338
+ status: SOURCE_STATUSES[authoritative_source_similarity],
339
+ }
340
+ end
341
+
342
+ def lineage_source_hash
343
+ return unless lineage_source
344
+
345
+ {
346
+ origin: {
347
+ ref: lineage_source,
348
+ },
349
+ type: "lineage",
350
+ status: SOURCE_STATUSES[lineage_source_similarity],
351
+ }
352
+ end
353
+
354
+ def to_localized_concept_hash
355
+ localized_concept_hash = to_hash
356
+
357
+ %w[
358
+ review_status
359
+ review_decision
360
+ review_decision_notes
361
+ review_indicator
362
+ authoritative_source
363
+ authoritative_source_similarity
364
+ lineage_source
365
+ lineage_source_similarity
366
+ country_code
367
+ ].each do |key|
368
+ localized_concept_hash.delete(key)
369
+ end
370
+
371
+ localized_concept_hash["id"] = localized_concept_hash["id"].to_s
372
+ localized_concept_hash["sources"] = sources_hash
373
+
374
+ localized_concept_hash
375
+ end
376
+ end
377
+ end
@@ -6,48 +6,47 @@ require_relative "information_sheet"
6
6
  require_relative "terminology_sheet"
7
7
 
8
8
  module Tc211::Termbase
9
+ class TermWorkbook
10
+ attr_accessor :workbook, :glossary_info, :languages, :filename
11
+
12
+ SPECIAL_SHEETS = [
13
+ "Glossary Information",
14
+ "Character Encoding Spreadsheet",
15
+ ].freeze
16
+
17
+ def initialize(filepath)
18
+ @filename = filepath
19
+ @workbook = Creek::Book.new(filepath)
20
+ @glossary_info = InformationSheet.new(
21
+ find_sheet_by_name("Glossary Information"),
22
+ )
23
+ @languages = languages_supported
24
+ self
25
+ end
9
26
 
10
- class TermWorkbook
11
- attr_accessor :workbook
12
- attr_accessor :glossary_info
13
- attr_accessor :languages
14
- attr_accessor :filename
15
-
16
- SPECIAL_SHEETS = [
17
- "Glossary Information",
18
- "Character Encoding Spreadsheet"
19
- ]
20
-
21
- def initialize(filepath)
22
- @filename = filepath
23
- @workbook = Creek::Book.new(filepath)
24
- @glossary_info = InformationSheet.new(find_sheet_by_name("Glossary Information"))
25
- @languages = languages_supported
26
- self
27
- end
28
-
29
- def languages_supported
30
- @workbook.sheets.map(&:name).reject! do |name|
31
- SPECIAL_SHEETS.include?(name)
27
+ def languages_supported
28
+ @workbook.sheets.map(&:name).reject! do |name|
29
+ SPECIAL_SHEETS.include?(name)
30
+ end
32
31
  end
33
- end
34
32
 
35
- def language_sheet(lang)
36
- raise unless @languages.include?(lang)
37
- TerminologySheet.new(find_sheet_by_name(lang))
38
- end
33
+ def language_sheet(lang)
34
+ raise unless @languages.include?(lang)
39
35
 
40
- def find_sheet_by_name(sheet_name)
41
- @workbook.sheets.detect do |sheet|
42
- sheet.name == sheet_name
36
+ TerminologySheet.new(find_sheet_by_name(lang))
37
+ end
38
+
39
+ def find_sheet_by_name(sheet_name)
40
+ @workbook.sheets.detect do |sheet|
41
+ sheet.name == sheet_name
42
+ end
43
43
  end
44
- end
45
44
 
46
- def write_glossary_info
47
- glossary_info_fn = Pathname.new(@filename).sub_ext(".yaml")
48
- File.open(glossary_info_fn,"w") do |file|
49
- file.write(glossary_info.to_yaml)
45
+ def write_glossary_info
46
+ glossary_info_fn = Pathname.new(@filename).sub_ext(".yaml")
47
+ File.open(glossary_info_fn, "w") do |file|
48
+ file.write(glossary_info.to_yaml)
49
+ end
50
50
  end
51
51
  end
52
52
  end
53
- end