tc211-termbase 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +13 -0
- data/.github/workflows/release.yml +23 -0
- data/.gitignore +3 -1
- data/exe/tc211-termbase-xlsx2yaml +2 -5
- data/lib/tc211/termbase/concept.rb +88 -39
- data/lib/tc211/termbase/concept_collection.rb +29 -21
- data/lib/tc211/termbase/information_sheet.rb +11 -15
- data/lib/tc211/termbase/metadata_section.rb +108 -112
- data/lib/tc211/termbase/sheet_section.rb +18 -19
- data/lib/tc211/termbase/term.rb +341 -230
- data/lib/tc211/termbase/term_workbook.rb +35 -36
- data/lib/tc211/termbase/terminology_sheet.rb +84 -74
- data/lib/tc211/termbase/terms_section.rb +9 -8
- data/lib/tc211/termbase/version.rb +1 -1
- data/lib/tc211/termbase.rb +1 -0
- data/tc211-termbase.gemspec +6 -6
- metadata +20 -39
- data/Gemfile.lock +0 -122
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 423950f471ef5d9c689a3542150d988fc997489a12b4ffe3668520d84bf6561f
|
4
|
+
data.tar.gz: f32b9f74dd1bb3310338785e58eb3488ba88656fa859fc3816e4615d9bad65a1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4bb9c3b774042e3fe3674d30f1eb8211be2e0ac26f4834ee53646056d0b2fae276f9011776df5e7c36eae2bc450dda178ec32d03d0db9336eff50a55d16dd02
|
7
|
+
data.tar.gz: 2dd00314060b0984f353112988ecf931310ab41f8b9a2737a5e9e55fcc8756f321a24085a7823fb3dd3a6e00c1c7357b84b4abe26bcc2741b2f38f78bb22d063
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
|
+
name: rake
|
4
|
+
|
5
|
+
on:
|
6
|
+
push:
|
7
|
+
branches: [ master, main ]
|
8
|
+
tags: [ v* ]
|
9
|
+
pull_request:
|
10
|
+
|
11
|
+
jobs:
|
12
|
+
rake:
|
13
|
+
uses: geolexica/ci/.github/workflows/rake.yml@main
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
|
+
name: release
|
4
|
+
|
5
|
+
on:
|
6
|
+
workflow_dispatch:
|
7
|
+
inputs:
|
8
|
+
next_version:
|
9
|
+
description: |
|
10
|
+
Next release version. Possible values: x.y.z, major, minor, patch or pre|rc|etc
|
11
|
+
required: true
|
12
|
+
default: 'skip'
|
13
|
+
repository_dispatch:
|
14
|
+
types: [ do-release ]
|
15
|
+
|
16
|
+
|
17
|
+
jobs:
|
18
|
+
release:
|
19
|
+
uses: geolexica/ci/.github/workflows/release.yml@main
|
20
|
+
with:
|
21
|
+
next_version: ${{ github.event.inputs.next_version }}
|
22
|
+
secrets:
|
23
|
+
rubygems-api-key: ${{ secrets.GEOLEXICA_RUBYGEMS_API_KEY }}
|
data/.gitignore
CHANGED
@@ -63,15 +63,12 @@ end
|
|
63
63
|
|
64
64
|
# collection[1206].inspect
|
65
65
|
|
66
|
-
collection.to_file(File.join(output_dir, Pathname.new(filepath).basename.sub_ext(".yaml")))
|
67
|
-
|
68
66
|
collection_output_dir = File.join(output_dir, "concepts")
|
69
67
|
|
70
68
|
FileUtils.mkdir_p(collection_output_dir)
|
71
69
|
|
72
|
-
collection.
|
73
|
-
|
74
|
-
end
|
70
|
+
concept_collection = collection.to_concept_collection
|
71
|
+
concept_collection.save_to_files(collection_output_dir)
|
75
72
|
|
76
73
|
# french = workbook.language_sheet("French")
|
77
74
|
# french.sections[3].structure
|
@@ -1,56 +1,105 @@
|
|
1
1
|
module Tc211::Termbase
|
2
|
+
class Concept < Hash
|
3
|
+
attr_reader :id
|
2
4
|
|
3
|
-
|
4
|
-
attr_accessor :id
|
5
|
-
attr_accessor :terms
|
6
|
-
DEFAULT_LANGUAGE = "eng"
|
5
|
+
DEFAULT_LANGUAGE = "eng".freeze
|
7
6
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
def initialize(options = {})
|
8
|
+
super
|
9
|
+
|
10
|
+
terms = options.delete(:terms) || []
|
11
|
+
terms.each do |term|
|
12
|
+
add_term(term)
|
13
|
+
end
|
14
|
+
|
15
|
+
options.each_pair do |k, v|
|
16
|
+
send("#{k}=", v)
|
17
|
+
end
|
12
18
|
end
|
13
19
|
|
14
|
-
|
15
|
-
|
20
|
+
# The concept id should ALWAYS be an integer.
|
21
|
+
# https://github.com/riboseinc/tc211-termbase/issues/1
|
22
|
+
def id=(newid)
|
23
|
+
@id = Integer(newid)
|
16
24
|
end
|
17
|
-
end
|
18
25
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
@id = Integer(newid)
|
23
|
-
end
|
26
|
+
def add_term(term)
|
27
|
+
self[term.language_code] = term
|
28
|
+
end
|
24
29
|
|
25
|
-
|
26
|
-
|
27
|
-
|
30
|
+
def terms
|
31
|
+
values
|
32
|
+
end
|
28
33
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
34
|
+
def default_term
|
35
|
+
if self[DEFAULT_LANGUAGE]
|
36
|
+
self[DEFAULT_LANGUAGE]
|
37
|
+
else
|
38
|
+
puts "[tc211-termbase] term (lang: #{keys.first}, ID: #{id}) is \
|
39
|
+
missing a corresponding English term, probably needs updating."
|
40
|
+
self[keys.first]
|
41
|
+
end
|
35
42
|
end
|
36
|
-
end
|
37
43
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
44
|
+
def to_hash
|
45
|
+
default_hash = {
|
46
|
+
"term" => default_term.term,
|
47
|
+
"termid" => id,
|
48
|
+
}
|
43
49
|
|
44
|
-
|
45
|
-
|
50
|
+
inject(default_hash) do |acc, (lang, term)|
|
51
|
+
acc.merge!(lang => term.to_hash)
|
52
|
+
end
|
46
53
|
end
|
47
|
-
end
|
48
54
|
|
49
|
-
|
50
|
-
|
51
|
-
|
55
|
+
def to_file(filename)
|
56
|
+
File.open(filename, "w") do |file|
|
57
|
+
file.write(to_hash.to_yaml)
|
58
|
+
end
|
52
59
|
end
|
53
|
-
end
|
54
60
|
|
61
|
+
def to_glossarist_concept
|
62
|
+
concept = Glossarist::ManagedConcept.new(data: { id: id.to_s })
|
63
|
+
|
64
|
+
localized_concepts = []
|
65
|
+
|
66
|
+
terms.map do |term|
|
67
|
+
next if term.nil?
|
68
|
+
|
69
|
+
localized_concepts << term.to_localized_concept_hash
|
70
|
+
end
|
71
|
+
|
72
|
+
concept.localized_concepts = localized_concepts
|
73
|
+
|
74
|
+
concept
|
75
|
+
end
|
76
|
+
end
|
55
77
|
end
|
56
|
-
|
78
|
+
|
79
|
+
# term: abbreviation
|
80
|
+
# termid: 2
|
81
|
+
# eng:
|
82
|
+
# id: 2
|
83
|
+
# term: abbreviation
|
84
|
+
# definition: designation formed by omitting words or letters from a longer
|
85
|
+
# form and designating the same concept
|
86
|
+
# language_code: eng
|
87
|
+
# notes: []
|
88
|
+
# examples: []
|
89
|
+
# entry_status: valid
|
90
|
+
# classification: preferred
|
91
|
+
# authoritative_source:
|
92
|
+
# ref: ISO 1087-1:2000
|
93
|
+
# clause: 3.4.9
|
94
|
+
# link: https://www.iso.org/standard/20057.html
|
95
|
+
# lineage_source: ISO/TS 19104:2008
|
96
|
+
# lineage_source_similarity: 1
|
97
|
+
# date_accepted: 2008-11-15 00:00:00.000000000 +08:00
|
98
|
+
# review_date: 2013-01-29 00:00:00.000000000 +08:00
|
99
|
+
# review_status: final
|
100
|
+
# review_decision: accepted
|
101
|
+
# review_decision_date: 2016-10-01 00:00:00.000000000 +08:00
|
102
|
+
# review_decision_event: Publication of ISO 19104:2016
|
103
|
+
# review_decision_notes: Authoritative reference changed from ISO 1087-1:2000
|
104
|
+
# to ISO 1087-1:2000, 3.4.9. Lineage source added as ISO/TS 19104:2008
|
105
|
+
# release: '2'
|
@@ -1,32 +1,40 @@
|
|
1
1
|
require_relative "concept"
|
2
2
|
|
3
3
|
module Tc211::Termbase
|
4
|
+
class ConceptCollection < Hash
|
5
|
+
def add_term(term)
|
6
|
+
if self[term.id]
|
7
|
+
self[term.id].add_term(term)
|
8
|
+
else
|
9
|
+
self[term.id] = Concept.new(
|
10
|
+
id: term.id,
|
11
|
+
terms: [term],
|
12
|
+
)
|
13
|
+
end
|
14
|
+
end
|
4
15
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
self[term.id].add_term(term)
|
10
|
-
else
|
11
|
-
self[term.id] = Concept.new(
|
12
|
-
id: term.id,
|
13
|
-
terms: [term]
|
14
|
-
)
|
16
|
+
def to_hash
|
17
|
+
inject({}) do |acc, (id, concept)|
|
18
|
+
acc.merge!(id => concept.to_hash)
|
19
|
+
end
|
15
20
|
end
|
16
|
-
end
|
17
21
|
|
18
|
-
|
19
|
-
|
20
|
-
|
22
|
+
def to_file(filename)
|
23
|
+
File.open(filename, "w") do |file|
|
24
|
+
file.write(to_hash.to_yaml)
|
25
|
+
end
|
21
26
|
end
|
22
|
-
end
|
23
27
|
|
24
|
-
|
25
|
-
|
26
|
-
|
28
|
+
def to_concept_collection
|
29
|
+
collection = Glossarist::ManagedConceptCollection.new
|
30
|
+
|
31
|
+
values.each do |term_concept|
|
32
|
+
next if term_concept.nil?
|
33
|
+
|
34
|
+
collection.store(term_concept.to_glossarist_concept)
|
35
|
+
end
|
36
|
+
|
37
|
+
collection
|
27
38
|
end
|
28
39
|
end
|
29
|
-
|
30
40
|
end
|
31
|
-
|
32
|
-
end
|
@@ -1,22 +1,18 @@
|
|
1
1
|
require_relative "terminology_sheet"
|
2
2
|
|
3
3
|
module Tc211::Termbase
|
4
|
+
class InformationSheet < TerminologySheet
|
5
|
+
def metadata_section
|
6
|
+
sheet_array = @sheet.simple_rows.to_a
|
7
|
+
MetadataSection.new(sheet_array)
|
8
|
+
end
|
4
9
|
|
5
|
-
|
10
|
+
def to_hash
|
11
|
+
{ "glossary" => metadata_section.to_hash }
|
12
|
+
end
|
6
13
|
|
7
|
-
|
8
|
-
|
9
|
-
|
14
|
+
def to_yaml
|
15
|
+
to_hash.to_yaml
|
16
|
+
end
|
10
17
|
end
|
11
|
-
|
12
|
-
def to_hash
|
13
|
-
{ "glossary" => metadata_section.to_hash }
|
14
|
-
end
|
15
|
-
|
16
|
-
def to_yaml
|
17
|
-
to_hash.to_yaml
|
18
|
-
end
|
19
|
-
|
20
18
|
end
|
21
|
-
|
22
|
-
end
|
@@ -1,136 +1,132 @@
|
|
1
1
|
require_relative "sheet_section"
|
2
2
|
|
3
3
|
module Tc211::Termbase
|
4
|
+
class MetadataSection < SheetSection
|
5
|
+
attr_accessor :header_row
|
6
|
+
attr_writer :attributes
|
7
|
+
|
8
|
+
GLOSSARY_HEADER_ROW_MATCH = {
|
9
|
+
# "English" uses "".
|
10
|
+
# "Arabic" uses "A".
|
11
|
+
# This is fixed in the MLGT as of 2018 Aug 6.
|
12
|
+
"A" => [nil, "Item", "A"],
|
13
|
+
|
14
|
+
"C" => ["Data Type"],
|
15
|
+
"D" => ["Special Instruction"],
|
16
|
+
|
17
|
+
# "Malay" has it empty ("")
|
18
|
+
# This is fixed in the MLGT as of 2018 Aug 6.
|
19
|
+
"E" => ["ISO 19135 Class.attribute", nil],
|
20
|
+
|
21
|
+
"F" => ["Domain"],
|
22
|
+
}.freeze
|
23
|
+
|
24
|
+
GLOSSARY_ROW_KEY_MAP = {
|
25
|
+
"A" => "name",
|
26
|
+
"B" => "value",
|
27
|
+
"C" => "datatype",
|
28
|
+
"D" => "special-instruction",
|
29
|
+
"E" => "19135-class-attribute",
|
30
|
+
"F" => "value-domain",
|
31
|
+
}.freeze
|
32
|
+
|
33
|
+
def initialize(rows, options = {})
|
34
|
+
super
|
35
|
+
|
36
|
+
self.class.match_header(@rows[0])
|
37
|
+
@header_row = @rows[0]
|
38
|
+
@body_rows = @rows[1..-1]
|
39
|
+
attributes
|
40
|
+
self
|
41
|
+
end
|
4
42
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
"C" => ["Data Type"],
|
16
|
-
"D" => ["Special Instruction"],
|
17
|
-
|
18
|
-
# "Malay" has it empty ("")
|
19
|
-
# This is fixed in the MLGT as of 2018 Aug 6.
|
20
|
-
"E" => ["ISO 19135 Class.attribute", nil],
|
21
|
-
|
22
|
-
"F" => ["Domain"]
|
23
|
-
}
|
24
|
-
|
25
|
-
GLOSSARY_ROW_KEY_MAP = {
|
26
|
-
"A" => "name",
|
27
|
-
"B" => "value",
|
28
|
-
"C" => "datatype",
|
29
|
-
"D" => "special-instruction",
|
30
|
-
"E" => "19135-class-attribute",
|
31
|
-
"F" => "value-domain"
|
32
|
-
}
|
33
|
-
|
34
|
-
def initialize(rows, options={})
|
35
|
-
super
|
36
|
-
|
37
|
-
self.class.match_header(@rows[0])
|
38
|
-
@header_row = @rows[0]
|
39
|
-
@body_rows = @rows[1..-1]
|
40
|
-
attributes
|
41
|
-
self
|
42
|
-
end
|
43
|
-
|
44
|
-
def self.match_header(columns)
|
45
|
-
# puts "row #{row}"
|
46
|
-
columns.each do |key, value|
|
47
|
-
# puts "#{key}, #{value}"
|
48
|
-
if GLOSSARY_HEADER_ROW_MATCH[key]
|
49
|
-
unless GLOSSARY_HEADER_ROW_MATCH[key].include?(value)
|
50
|
-
raise RowHeaderMatchError.new("Metadata section header for column `#{key}` does not match expected value `#{value}`")
|
43
|
+
def self.match_header(columns)
|
44
|
+
# puts "row #{row}"
|
45
|
+
columns.each do |key, value|
|
46
|
+
# puts "#{key}, #{value}"
|
47
|
+
header_row_match = GLOSSARY_HEADER_ROW_MATCH[key]
|
48
|
+
if header_row_match && !header_row_match.include?(value)
|
49
|
+
raise RowHeaderMatchError.new(
|
50
|
+
"Metadata section header for column `#{key}` does not match \
|
51
|
+
expected value `#{value}`",
|
52
|
+
)
|
51
53
|
end
|
52
54
|
end
|
53
55
|
end
|
54
|
-
end
|
55
|
-
|
56
56
|
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
def structure
|
58
|
+
GLOSSARY_ROW_KEY_MAP
|
59
|
+
end
|
60
60
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
61
|
+
def clean_key(key)
|
62
|
+
key.strip
|
63
|
+
.downcase
|
64
|
+
.gsub(/[()]/, "")
|
65
|
+
.gsub(" ", "-")
|
66
|
+
end
|
67
67
|
|
68
|
-
|
69
|
-
|
68
|
+
def clean_value(value)
|
69
|
+
return nil if value.nil?
|
70
70
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
71
|
+
case value
|
72
|
+
when String
|
73
|
+
value.strip
|
74
|
+
else
|
75
|
+
value
|
76
|
+
end
|
76
77
|
end
|
77
|
-
end
|
78
78
|
|
79
|
-
|
80
|
-
|
81
|
-
attribute = {}
|
79
|
+
def parse_row(row)
|
80
|
+
return nil if row.empty?
|
82
81
|
|
83
|
-
|
84
|
-
# puts"#{key}, #{value}, #{row[key]}"
|
85
|
-
attribute_key = clean_key(value)
|
86
|
-
attribute_value = clean_value(row[key])
|
87
|
-
next if attribute_value.nil?
|
88
|
-
attribute[attribute_key] = attribute_value
|
89
|
-
end
|
82
|
+
attribute = {}
|
90
83
|
|
91
|
-
|
92
|
-
|
84
|
+
structure.each_pair do |key, value|
|
85
|
+
attribute_value = clean_value(row[key])
|
86
|
+
next if attribute_value.nil?
|
93
87
|
|
94
|
-
|
95
|
-
|
88
|
+
attribute[clean_key(value)] = attribute_value
|
89
|
+
end
|
96
90
|
|
97
|
-
|
98
|
-
|
91
|
+
# TODO: "Chinese" name is empty!
|
92
|
+
key = clean_key(attribute["name"] || "(empty)")
|
99
93
|
|
100
|
-
|
101
|
-
@body_rows.each do |row|
|
102
|
-
result = parse_row(row)
|
103
|
-
@attributes.merge!(result) if result
|
94
|
+
{ key => attribute }
|
104
95
|
end
|
105
|
-
@attributes
|
106
|
-
end
|
107
96
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
# "value-domain"=>
|
118
|
-
# "<<Data Type>>RE_Locale.country \n" +
|
119
|
-
# "[ISO 3166-1 3-character numeric country code]"},
|
120
|
-
#
|
121
|
-
|
122
|
-
attributes.inject({}) do |acc, (k, v)|
|
123
|
-
acc.merge({
|
124
|
-
k => v["value"]
|
125
|
-
})
|
97
|
+
def attributes
|
98
|
+
return @attributes if @attributes
|
99
|
+
|
100
|
+
@attributes = {}
|
101
|
+
@body_rows.each do |row|
|
102
|
+
result = parse_row(row)
|
103
|
+
@attributes.merge!(result) if result
|
104
|
+
end
|
105
|
+
@attributes
|
126
106
|
end
|
127
|
-
end
|
128
107
|
|
129
|
-
|
130
|
-
|
131
|
-
"
|
132
|
-
|
133
|
-
|
108
|
+
def fields
|
109
|
+
# "operating-language-country"=>
|
110
|
+
# {"name"=>"Operating Language Country",
|
111
|
+
# "value"=>"410",
|
112
|
+
# "datatype"=>"Country Code",
|
113
|
+
# "special-instruction"=>
|
114
|
+
# "ftp.ics.uci.edu/pub/ietf/http/related/iso3166.txt",
|
115
|
+
# "19135-class-attribute"=>"RE_Register.operatingLanguage",
|
116
|
+
# "value-domain"=>
|
117
|
+
# "<<Data Type>>RE_Locale.country \n" +
|
118
|
+
# "[ISO 3166-1 3-character numeric country code]"},
|
119
|
+
#
|
120
|
+
|
121
|
+
attributes.inject({}) do |acc, (k, v)|
|
122
|
+
acc.merge({ k => v["value"] })
|
123
|
+
end
|
124
|
+
end
|
134
125
|
|
135
|
-
|
126
|
+
def to_hash
|
127
|
+
{
|
128
|
+
"metadata" => fields,
|
129
|
+
}
|
130
|
+
end
|
131
|
+
end
|
136
132
|
end
|
@@ -1,30 +1,29 @@
|
|
1
1
|
|
2
2
|
module Tc211::Termbase
|
3
|
+
class SheetSection
|
3
4
|
|
4
|
-
class
|
5
|
+
class RowHeaderMatchError < StandardError; end
|
5
6
|
|
6
|
-
|
7
|
-
class UnknownHeaderError < StandardError; end
|
7
|
+
class UnknownHeaderError < StandardError; end
|
8
8
|
|
9
|
-
|
9
|
+
attr_accessor :sheet_content
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
@rows = rows
|
15
|
-
# @has_header = options[:has_header].nil? ? true : options[:has_header]
|
16
|
-
self
|
17
|
-
end
|
11
|
+
def initialize(rows, _options = {})
|
12
|
+
# rows is an array of rows!
|
13
|
+
raise unless rows.is_a?(Array)
|
18
14
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
15
|
+
@rows = rows
|
16
|
+
# @has_header = options[:has_header].nil? ? true : options[:has_header]
|
17
|
+
self
|
18
|
+
end
|
23
19
|
|
24
|
-
|
20
|
+
# Abstract method
|
21
|
+
def self.match_header(_row)
|
22
|
+
false
|
23
|
+
end
|
25
24
|
|
26
|
-
|
25
|
+
def self.identify_type(_row); end
|
27
26
|
|
28
|
-
|
27
|
+
# TODO
|
28
|
+
end
|
29
29
|
end
|
30
|
-
end
|