tc211-termbase 0.1.13 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,245 +1,377 @@
1
1
  module Tc211::Termbase
2
+ class Term
3
+ INPUT_ATTRIBS = %i(
4
+ id
5
+ term
6
+ abbrev
7
+ synonyms
8
+ alt
9
+ definition
10
+ country_code
11
+ language_code
12
+ notes
13
+ examples
14
+ entry_status
15
+ classification
16
+ review_indicator
17
+ authoritative_source
18
+ authoritative_source_similarity
19
+ lineage_source
20
+ lineage_source_similarity
21
+ date_accepted
22
+ date_amended
23
+ review_date
24
+ review_status
25
+ review_type
26
+ review_decision
27
+ review_decision_date
28
+ review_decision_event
29
+ review_decision_notes
30
+ release
31
+ ).freeze
2
32
 
3
- class Term
4
-
5
- ATTRIBS = %i(
6
- id term abbrev synonyms alt definition
7
- country_code
8
- language_code
9
- notes examples
10
- entry_status
11
- classification
12
- review_indicator
13
- authoritative_source
14
- authoritative_source_similarity
15
- lineage_source
16
- lineage_source_similarity
17
- date_accepted
18
- date_amended
19
- review_date
20
- review_status
21
- review_type
22
- review_decision
23
- review_decision_date
24
- review_decision_event
25
- review_decision_notes
26
- release
27
- )
28
-
29
- attr_accessor *ATTRIBS
30
-
31
- def initialize(options={})
32
- @examples = []
33
- @notes = []
34
-
35
- # puts "options #{options.inspect}"
36
-
37
- options.each_pair do |k, v|
38
- v = v.strip if v.is_a?(String)
39
- next unless v
40
- case k
41
- when /^example/
42
- add_example(v)
43
- when /^note/
44
- add_note(v)
45
- else
46
- # puts"Key #{k}"
47
- key = k.gsub("-", "_")
48
- self.send("#{key}=", v)
33
+ OUTPUT_ATTRIBS = INPUT_ATTRIBS - %i(term alt abbrev synonyms classification) + %i(terms)
34
+
35
+ attr_accessor *(INPUT_ATTRIBS | OUTPUT_ATTRIBS)
36
+
37
+ def initialize(options = {})
38
+ @examples = []
39
+ @notes = []
40
+ @definition = []
41
+
42
+ # puts "options #{options.inspect}"
43
+
44
+ options.each_pair do |k, v|
45
+ v = v.strip if v.is_a?(String)
46
+ next unless v
47
+
48
+ case k
49
+ when /^example/
50
+ add_example(v)
51
+ when /^note/
52
+ add_note(v)
53
+ else
54
+ # puts"Key #{k}"
55
+ key = k.gsub("-", "_")
56
+ send("#{key}=", v)
57
+ end
49
58
  end
59
+ self
50
60
  end
51
- self
52
- end
53
61
 
54
- STRIP_PUNCTUATION = [
55
- ":",
56
- ":",
57
- ".",
58
- "–",
59
- "\-"
60
- ]
61
-
62
- # WARNING
63
- # Always put the longer Regexp match in front!
64
- EXAMPLE_PREFIXES = {
65
- # TODO: fix this, we should not have "EXAMPLES"
66
- eng: ["EXAMPLES", "EXAMPLE"],
67
- ara: "مثال",
68
- chi: "示例",
69
- dan: "EKSEMPEL",
70
- dut: ["VOORBEELD", "VOORBEELDEN"],
71
- fin: "ESIM",
72
- fre: "Exemple",
73
- # ger: "",
74
- jpn: "例",
75
- kor: "보기",
76
- pol: "PRZYKŁAD",
77
- may: "Contoh",
78
- rus: "Пример",
79
- spa: "Ejemplo",
80
- swe: "Exempel"
81
- }
82
-
83
- # WARNING
84
- # Always put the longer Regexp match in front!
85
- NOTE_PREFIXES = {
86
- eng: ["Note \\d to entry", "NOTE"],
87
- ara: "ملاحظة",
88
- chi: "注",
89
- dan: "Note",
90
- dut: "OPMERKING",
91
- fin: "HUOM\\.?", # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1." (numeral added by the method)
92
- fre: "A noter",
93
- # ger: "",
94
- jpn: "備考",
95
- kor: "비고",
96
- pol: "UWAGA",
97
- may: "catatan",
98
- rus: "нота",
99
- spa: "Nota",
100
- swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."]
101
- }
102
-
103
- # To match Chinese and Japanese numerals
104
- ALL_FULL_HALF_WIDTH_NUMBERS = "[0-90-9]"
105
-
106
- def add_example(example)
107
- c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
108
- @examples << c unless c.empty?
109
- end
62
+ STRIP_PUNCTUATION = [
63
+ ":",
64
+ ":",
65
+ ".",
66
+ "–",
67
+ "\-",
68
+ ].freeze
110
69
 
111
- def add_note(note)
112
- c = clean_prefixed_string(note, NOTE_PREFIXES)
113
- @notes << c unless c.empty?
114
- end
70
+ # WARNING
71
+ # Always put the longer Regexp match in front!
72
+ EXAMPLE_PREFIXES = {
73
+ # TODO: fix this, we should not have "EXAMPLES"
74
+ eng: ["EXAMPLES", "EXAMPLE"],
75
+ ara: "مثال",
76
+ chi: "示例",
77
+ dan: "EKSEMPEL",
78
+ dut: ["VOORBEELD", "VOORBEELDEN"],
79
+ fin: "ESIM",
80
+ fre: "Exemple",
81
+ # ger: "",
82
+ jpn: "例",
83
+ kor: "보기",
84
+ pol: "PRZYKŁAD",
85
+ may: "Contoh",
86
+ rus: "Пример",
87
+ spa: "Ejemplo",
88
+ swe: "Exempel",
89
+ }.freeze
115
90
 
116
- def clean_prefixed_string(string, criterion_map)
117
- carry = string.strip
118
- criterion_map.values.flatten.each do |mat|
119
- # puts "example string: #{carry}, mat: #{mat}"
120
-
121
- # puts "note string: #{carry}, mat: #{mat}"
122
- # if @id == 318 and mat == "Nota" and string == "NOTA 1 Una operación tiene un nombre y una lista de parámetros."
123
- # require "pry"
124
- # binding.pry
125
- # end
126
-
127
- # Arabic notes/examples sometimes use parantheses around numbers
128
- carry = carry.sub(
129
- Regexp.new(
130
- "^#{mat}\s*[#{STRIP_PUNCTUATION.join('')}]?" +
131
- "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*"+
132
- "[#{STRIP_PUNCTUATION.join('')}]?\s*",
133
- Regexp::IGNORECASE
134
- ),
135
- '')
136
- end
137
-
138
- carry
139
- end
91
+ # WARNING
92
+ # Always put the longer Regexp match in front!
93
+ NOTE_PREFIXES = {
94
+ eng: ["Note \\d to entry", "NOTE"],
95
+ ara: "ملاحظة",
96
+ chi: "注",
97
+ dan: "Note",
98
+ dut: "OPMERKING",
99
+ # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1."
100
+ # (numeral added by the method)
101
+ fin: "HUOM\\.?",
102
+ fre: "A noter",
103
+ # ger: "",
104
+ jpn: "備考",
105
+ kor: "비고",
106
+ pol: "UWAGA",
107
+ may: "catatan",
108
+ rus: "нота",
109
+ spa: "Nota",
110
+ swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."],
111
+ }.freeze
140
112
 
113
+ # To match Chinese and Japanese numerals
114
+ ALL_FULL_HALF_WIDTH_NUMBERS = "[0-90-9]".freeze
141
115
 
142
- # The termid should ALWAYS be an integer.
143
- # https://github.com/riboseinc/tc211-termbase/issues/1
144
- def id=(newid)
145
- @id = Integer(newid)
146
- end
116
+ SOURCE_STATUSES = {
117
+ 1 => "identical",
118
+ 2 => "restyle",
119
+ 3 => "context_added",
120
+ 4 => "generalisation",
121
+ 5 => "specialisation",
122
+ 6 => "unspecified",
123
+ }.freeze
147
124
 
148
- def to_hash
149
- ATTRIBS.inject({}) do |acc, attrib|
150
- value = self.send(attrib)
151
- unless value.nil?
152
- acc.merge(attrib.to_s => value)
153
- else
154
- acc
125
+ def add_example(example)
126
+ c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
127
+ @examples << c unless c.empty?
128
+ end
129
+
130
+ def add_note(note)
131
+ c = clean_prefixed_string(note, NOTE_PREFIXES)
132
+ @notes << c unless c.empty?
133
+ end
134
+
135
+ def clean_prefixed_string(string, criterion_map)
136
+ carry = string.to_s.strip
137
+ criterion_map.values.flatten.each do |mat|
138
+ # Arabic notes/examples sometimes use parantheses around numbers
139
+ carry = carry.sub(carry_regex(mat), "")
155
140
  end
141
+
142
+ carry
156
143
  end
157
- end
158
144
 
159
- # entry-status
160
- ## Must be one of notValid valid superseded retired
161
- def entry_status=(value)
162
- case value
163
- when "有效的", "käytössä", "действующий", "válido"
164
- value = "valid"
165
- when "korvattu", "reemplazado"
166
- value = "superseded"
167
- when "информация отсутствует" # "information absent"!?
168
- value = "retired"
169
- when %w(notValid valid superseded retired)
170
- # do nothing
171
- end
172
- @entry_status = value
173
- end
145
+ def carry_regex(mat)
146
+ Regexp.new(
147
+ [
148
+ "^#{mat}\s*[#{STRIP_PUNCTUATION.join}]?",
149
+ "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*",
150
+ "[#{STRIP_PUNCTUATION.join}]?\s*",
151
+ ].join,
152
+ )
153
+ end
174
154
 
175
- # classification
176
- ## Must be one of the following: preferred admitted deprecated
177
- def classification=(value)
178
- case value
179
- when ""
180
- value = "admitted"
181
- when "认可的", "допустимый", "admitido"
182
- value = "admitted"
183
- when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
184
- value = "preferred"
185
- when %w(preferred admitted deprecated)
186
- # do nothing
187
- end
188
- @classification = value
189
- end
155
+ # The termid should ALWAYS be an integer.
156
+ # https://github.com/riboseinc/tc211-termbase/issues/1
157
+ def id=(newid)
158
+ @id = Integer(newid)
159
+ end
190
160
 
191
- # review-indicator
192
- ## Must be one of the following <empty field> Under Review in Source Document",
193
- def review_indicator=(value)
194
- unless ["", "Under Review in Source Document"].include?(value)
195
- value = ""
161
+ def definition=(definition)
162
+ @definition << definition
196
163
  end
197
- @review_indicator = value
198
- end
199
164
 
200
- # authoritative-source-similarity
201
- # ## Must be one of the following codes: identical = 1 restyled = 2 context added = 3 generalisation = 4 specialisation = 5 unspecified = 6",
202
- def authoritative_source_similarity=(value)
203
- unless (1..6).include?(value)
204
- value = 6
165
+ def to_hash
166
+ OUTPUT_ATTRIBS.inject({}) do |acc, attrib|
167
+ value = send(attrib)
168
+ if value.nil?
169
+ acc
170
+ else
171
+ acc.merge(attrib.to_s => value)
172
+ end
173
+ end
205
174
  end
206
- @authoritative_source_similarity = value
207
- end
208
175
 
209
- # lineage-source-similarity
210
- # ## Must be one of the following codes: identical = 1 restyled = 2 context added = 3 generalisation = 4 specialisation = 5 unspecified = 6",
211
- def authoritative_source_similarity=(value)
212
- unless (1..6).include?(value)
213
- value = 6
176
+ # entry-status
177
+ ## Must be one of notValid valid superseded retired
178
+ def entry_status=(value)
179
+ case value
180
+ when "有效的", "käytössä", "действующий", "válido"
181
+ value = "valid"
182
+ when "korvattu", "reemplazado"
183
+ value = "superseded"
184
+ when "информация отсутствует" # "information absent"!?
185
+ value = "retired"
186
+ when %w(notValid valid superseded retired) # do nothing
187
+ end
188
+ @entry_status = value
214
189
  end
215
- @authoritative_source_similarity
216
- end
217
190
 
218
- def review_status=(value) ## Must be one of pending tentative final
219
- unless ["", "pending", "tentative", "final"].include?(value)
220
- value = ""
191
+ # classification
192
+ ## Must be one of the following: preferred admitted deprecated
193
+ def classification=(value)
194
+ case value
195
+ when "", "认可的", "допустимый", "admitido", "adminitido"
196
+ value = "admitted"
197
+ when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
198
+ value = "preferred"
199
+ when %w(preferred admitted deprecated)
200
+ # do nothing
201
+ end
202
+ @classification = value
221
203
  end
222
- @review_status = value
223
- end
224
204
 
225
- def review_type=(value) ## Must be one of supersession, retirement
226
- unless ["", "supersession", "retirement"].include?(value)
227
- value = ""
205
+ # review-indicator
206
+ # Must be one of the following
207
+ # <empty field>
208
+ # Under Review in Source Document
209
+ def review_indicator=(value)
210
+ unless ["", "Under Review in Source Document"].include?(value)
211
+ value = ""
212
+ end
213
+ @review_indicator = value
228
214
  end
229
- @review_type = value
230
- end
231
215
 
232
- def review_decision=(value) ## Must be one of withdrawn, accepted notAccepted
233
- unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
234
- value = ""
216
+ # authoritative-source-similarity
217
+ # Must be one of the following codes:
218
+ # identical = 1
219
+ # restyled = 2
220
+ # context added = 3
221
+ # generalisation = 4
222
+ # specialisation = 5
223
+ # unspecified = 6
224
+ def authoritative_source_similarity=(value)
225
+ unless SOURCE_STATUSES.key?(value)
226
+ value = 6
227
+ end
228
+ @authoritative_source_similarity = value
235
229
  end
236
- @review_decision = value
237
- end
238
230
 
239
- def retired?
240
- release >= 0
241
- end
231
+ # lineage-source-similarity
232
+ # Must be one of the following codes:
233
+ # identical = 1
234
+ # restyled = 2
235
+ # context added = 3
236
+ # generalisation = 4
237
+ # specialisation = 5
238
+ # unspecified = 6
239
+ def lineage_source_similarity=(value)
240
+ unless SOURCE_STATUSES.key?(value)
241
+ value = 6
242
+ end
243
+ @lineage_source_similarity = value
244
+ end
242
245
 
243
- end
246
+ ## value Must be one of pending tentative final
247
+ def review_status=(value)
248
+ unless ["", "pending", "tentative", "final"].include?(value)
249
+ value = ""
250
+ end
251
+ @review_status = value
252
+ end
244
253
 
245
- end
254
+ ## value Must be one of supersession, retirement
255
+ def review_type=(value)
256
+ unless ["", "supersession", "retirement"].include?(value)
257
+ value = ""
258
+ end
259
+ @review_type = value
260
+ end
261
+
262
+ ## value Must be one of withdrawn, accepted notAccepted
263
+ def review_decision=(value)
264
+ unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
265
+ value = ""
266
+ end
267
+ @review_decision = value
268
+ end
269
+
270
+ def retired?
271
+ release >= 0
272
+ end
273
+
274
+ def terms
275
+ [
276
+ primary_term_hash,
277
+ alt_term_hash,
278
+ abbreviation_term_hash,
279
+ synonyms_term_hash,
280
+ ].compact
281
+ end
282
+
283
+ def primary_term_hash
284
+ return unless term
285
+
286
+ {
287
+ "type" => "expression",
288
+ "designation" => term,
289
+ "normative_status" => classification,
290
+ }
291
+ end
292
+
293
+ def alt_term_hash
294
+ return unless alt
295
+
296
+ {
297
+ "type" => "expression",
298
+ "designation" => alt,
299
+ "normative_status" => classification,
300
+ }
301
+ end
302
+
303
+ def abbreviation_term_hash
304
+ return unless abbrev
305
+
306
+ {
307
+ "type" => "abbreviation",
308
+ "designation" => abbrev,
309
+ }
310
+ end
311
+
312
+ def synonyms_term_hash
313
+ return unless synonyms
314
+
315
+ {
316
+ "type" => "expression",
317
+ "designation" => synonyms,
318
+ }
319
+ end
320
+
321
+ def sources_hash
322
+ [
323
+ authoritative_source_hash,
324
+ lineage_source_hash,
325
+ ].compact
326
+ end
327
+
328
+ def authoritative_source_hash
329
+ return unless authoritative_source
330
+
331
+ {
332
+ origin: {
333
+ link: authoritative_source["link"],
334
+ ref: authoritative_source["ref"],
335
+ clause: authoritative_source["clause"],
336
+ },
337
+ type: "authoritative",
338
+ status: SOURCE_STATUSES[authoritative_source_similarity],
339
+ }
340
+ end
341
+
342
+ def lineage_source_hash
343
+ return unless lineage_source
344
+
345
+ {
346
+ origin: {
347
+ ref: lineage_source,
348
+ },
349
+ type: "lineage",
350
+ status: SOURCE_STATUSES[lineage_source_similarity],
351
+ }
352
+ end
353
+
354
+ def to_localized_concept_hash
355
+ localized_concept_hash = to_hash
356
+
357
+ %w[
358
+ review_status
359
+ review_decision
360
+ review_decision_notes
361
+ review_indicator
362
+ authoritative_source
363
+ authoritative_source_similarity
364
+ lineage_source
365
+ lineage_source_similarity
366
+ country_code
367
+ ].each do |key|
368
+ localized_concept_hash.delete(key)
369
+ end
370
+
371
+ localized_concept_hash["id"] = localized_concept_hash["id"].to_s
372
+ localized_concept_hash["sources"] = sources_hash
373
+
374
+ localized_concept_hash
375
+ end
376
+ end
377
+ end
@@ -6,48 +6,47 @@ require_relative "information_sheet"
6
6
  require_relative "terminology_sheet"
7
7
 
8
8
  module Tc211::Termbase
9
+ class TermWorkbook
10
+ attr_accessor :workbook, :glossary_info, :languages, :filename
11
+
12
+ SPECIAL_SHEETS = [
13
+ "Glossary Information",
14
+ "Character Encoding Spreadsheet",
15
+ ].freeze
16
+
17
+ def initialize(filepath)
18
+ @filename = filepath
19
+ @workbook = Creek::Book.new(filepath)
20
+ @glossary_info = InformationSheet.new(
21
+ find_sheet_by_name("Glossary Information"),
22
+ )
23
+ @languages = languages_supported
24
+ self
25
+ end
9
26
 
10
- class TermWorkbook
11
- attr_accessor :workbook
12
- attr_accessor :glossary_info
13
- attr_accessor :languages
14
- attr_accessor :filename
15
-
16
- SPECIAL_SHEETS = [
17
- "Glossary Information",
18
- "Character Encoding Spreadsheet"
19
- ]
20
-
21
- def initialize(filepath)
22
- @filename = filepath
23
- @workbook = Creek::Book.new(filepath)
24
- @glossary_info = InformationSheet.new(find_sheet_by_name("Glossary Information"))
25
- @languages = languages_supported
26
- self
27
- end
28
-
29
- def languages_supported
30
- @workbook.sheets.map(&:name).reject! do |name|
31
- SPECIAL_SHEETS.include?(name)
27
+ def languages_supported
28
+ @workbook.sheets.map(&:name).reject! do |name|
29
+ SPECIAL_SHEETS.include?(name)
30
+ end
32
31
  end
33
- end
34
32
 
35
- def language_sheet(lang)
36
- raise unless @languages.include?(lang)
37
- TerminologySheet.new(find_sheet_by_name(lang))
38
- end
33
+ def language_sheet(lang)
34
+ raise unless @languages.include?(lang)
39
35
 
40
- def find_sheet_by_name(sheet_name)
41
- @workbook.sheets.detect do |sheet|
42
- sheet.name == sheet_name
36
+ TerminologySheet.new(find_sheet_by_name(lang))
37
+ end
38
+
39
+ def find_sheet_by_name(sheet_name)
40
+ @workbook.sheets.detect do |sheet|
41
+ sheet.name == sheet_name
42
+ end
43
43
  end
44
- end
45
44
 
46
- def write_glossary_info
47
- glossary_info_fn = Pathname.new(@filename).sub_ext(".yaml")
48
- File.open(glossary_info_fn,"w") do |file|
49
- file.write(glossary_info.to_yaml)
45
+ def write_glossary_info
46
+ glossary_info_fn = Pathname.new(@filename).sub_ext(".yaml")
47
+ File.open(glossary_info_fn, "w") do |file|
48
+ file.write(glossary_info.to_yaml)
49
+ end
50
50
  end
51
51
  end
52
52
  end
53
- end