tc211-termbase 0.1.13 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +13 -0
- data/.github/workflows/release.yml +23 -0
- data/.gitignore +3 -1
- data/exe/tc211-termbase-xlsx2yaml +2 -5
- data/lib/tc211/termbase/concept.rb +88 -39
- data/lib/tc211/termbase/concept_collection.rb +29 -21
- data/lib/tc211/termbase/information_sheet.rb +11 -15
- data/lib/tc211/termbase/metadata_section.rb +108 -112
- data/lib/tc211/termbase/sheet_section.rb +18 -19
- data/lib/tc211/termbase/term.rb +348 -216
- data/lib/tc211/termbase/term_workbook.rb +35 -36
- data/lib/tc211/termbase/terminology_sheet.rb +84 -74
- data/lib/tc211/termbase/terms_section.rb +9 -8
- data/lib/tc211/termbase/version.rb +1 -1
- data/lib/tc211/termbase.rb +1 -0
- data/tc211-termbase.gemspec +6 -6
- metadata +16 -29
- data/Gemfile.lock +0 -123
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 423950f471ef5d9c689a3542150d988fc997489a12b4ffe3668520d84bf6561f
|
4
|
+
data.tar.gz: f32b9f74dd1bb3310338785e58eb3488ba88656fa859fc3816e4615d9bad65a1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4bb9c3b774042e3fe3674d30f1eb8211be2e0ac26f4834ee53646056d0b2fae276f9011776df5e7c36eae2bc450dda178ec32d03d0db9336eff50a55d16dd02
|
7
|
+
data.tar.gz: 2dd00314060b0984f353112988ecf931310ab41f8b9a2737a5e9e55fcc8756f321a24085a7823fb3dd3a6e00c1c7357b84b4abe26bcc2741b2f38f78bb22d063
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
|
+
name: rake
|
4
|
+
|
5
|
+
on:
|
6
|
+
push:
|
7
|
+
branches: [ master, main ]
|
8
|
+
tags: [ v* ]
|
9
|
+
pull_request:
|
10
|
+
|
11
|
+
jobs:
|
12
|
+
rake:
|
13
|
+
uses: geolexica/ci/.github/workflows/rake.yml@main
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
|
+
name: release
|
4
|
+
|
5
|
+
on:
|
6
|
+
workflow_dispatch:
|
7
|
+
inputs:
|
8
|
+
next_version:
|
9
|
+
description: |
|
10
|
+
Next release version. Possible values: x.y.z, major, minor, patch or pre|rc|etc
|
11
|
+
required: true
|
12
|
+
default: 'skip'
|
13
|
+
repository_dispatch:
|
14
|
+
types: [ do-release ]
|
15
|
+
|
16
|
+
|
17
|
+
jobs:
|
18
|
+
release:
|
19
|
+
uses: geolexica/ci/.github/workflows/release.yml@main
|
20
|
+
with:
|
21
|
+
next_version: ${{ github.event.inputs.next_version }}
|
22
|
+
secrets:
|
23
|
+
rubygems-api-key: ${{ secrets.GEOLEXICA_RUBYGEMS_API_KEY }}
|
data/.gitignore
CHANGED
@@ -63,15 +63,12 @@ end
|
|
63
63
|
|
64
64
|
# collection[1206].inspect
|
65
65
|
|
66
|
-
collection.to_file(File.join(output_dir, Pathname.new(filepath).basename.sub_ext(".yaml")))
|
67
|
-
|
68
66
|
collection_output_dir = File.join(output_dir, "concepts")
|
69
67
|
|
70
68
|
FileUtils.mkdir_p(collection_output_dir)
|
71
69
|
|
72
|
-
collection.
|
73
|
-
|
74
|
-
end
|
70
|
+
concept_collection = collection.to_concept_collection
|
71
|
+
concept_collection.save_to_files(collection_output_dir)
|
75
72
|
|
76
73
|
# french = workbook.language_sheet("French")
|
77
74
|
# french.sections[3].structure
|
@@ -1,56 +1,105 @@
|
|
1
1
|
module Tc211::Termbase
|
2
|
+
class Concept < Hash
|
3
|
+
attr_reader :id
|
2
4
|
|
3
|
-
|
4
|
-
attr_accessor :id
|
5
|
-
attr_accessor :terms
|
6
|
-
DEFAULT_LANGUAGE = "eng"
|
5
|
+
DEFAULT_LANGUAGE = "eng".freeze
|
7
6
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
def initialize(options = {})
|
8
|
+
super
|
9
|
+
|
10
|
+
terms = options.delete(:terms) || []
|
11
|
+
terms.each do |term|
|
12
|
+
add_term(term)
|
13
|
+
end
|
14
|
+
|
15
|
+
options.each_pair do |k, v|
|
16
|
+
send("#{k}=", v)
|
17
|
+
end
|
12
18
|
end
|
13
19
|
|
14
|
-
|
15
|
-
|
20
|
+
# The concept id should ALWAYS be an integer.
|
21
|
+
# https://github.com/riboseinc/tc211-termbase/issues/1
|
22
|
+
def id=(newid)
|
23
|
+
@id = Integer(newid)
|
16
24
|
end
|
17
|
-
end
|
18
25
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
@id = Integer(newid)
|
23
|
-
end
|
26
|
+
def add_term(term)
|
27
|
+
self[term.language_code] = term
|
28
|
+
end
|
24
29
|
|
25
|
-
|
26
|
-
|
27
|
-
|
30
|
+
def terms
|
31
|
+
values
|
32
|
+
end
|
28
33
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
34
|
+
def default_term
|
35
|
+
if self[DEFAULT_LANGUAGE]
|
36
|
+
self[DEFAULT_LANGUAGE]
|
37
|
+
else
|
38
|
+
puts "[tc211-termbase] term (lang: #{keys.first}, ID: #{id}) is \
|
39
|
+
missing a corresponding English term, probably needs updating."
|
40
|
+
self[keys.first]
|
41
|
+
end
|
35
42
|
end
|
36
|
-
end
|
37
43
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
44
|
+
def to_hash
|
45
|
+
default_hash = {
|
46
|
+
"term" => default_term.term,
|
47
|
+
"termid" => id,
|
48
|
+
}
|
43
49
|
|
44
|
-
|
45
|
-
|
50
|
+
inject(default_hash) do |acc, (lang, term)|
|
51
|
+
acc.merge!(lang => term.to_hash)
|
52
|
+
end
|
46
53
|
end
|
47
|
-
end
|
48
54
|
|
49
|
-
|
50
|
-
|
51
|
-
|
55
|
+
def to_file(filename)
|
56
|
+
File.open(filename, "w") do |file|
|
57
|
+
file.write(to_hash.to_yaml)
|
58
|
+
end
|
52
59
|
end
|
53
|
-
end
|
54
60
|
|
61
|
+
def to_glossarist_concept
|
62
|
+
concept = Glossarist::ManagedConcept.new(data: { id: id.to_s })
|
63
|
+
|
64
|
+
localized_concepts = []
|
65
|
+
|
66
|
+
terms.map do |term|
|
67
|
+
next if term.nil?
|
68
|
+
|
69
|
+
localized_concepts << term.to_localized_concept_hash
|
70
|
+
end
|
71
|
+
|
72
|
+
concept.localized_concepts = localized_concepts
|
73
|
+
|
74
|
+
concept
|
75
|
+
end
|
76
|
+
end
|
55
77
|
end
|
56
|
-
|
78
|
+
|
79
|
+
# term: abbreviation
|
80
|
+
# termid: 2
|
81
|
+
# eng:
|
82
|
+
# id: 2
|
83
|
+
# term: abbreviation
|
84
|
+
# definition: designation formed by omitting words or letters from a longer
|
85
|
+
# form and designating the same concept
|
86
|
+
# language_code: eng
|
87
|
+
# notes: []
|
88
|
+
# examples: []
|
89
|
+
# entry_status: valid
|
90
|
+
# classification: preferred
|
91
|
+
# authoritative_source:
|
92
|
+
# ref: ISO 1087-1:2000
|
93
|
+
# clause: 3.4.9
|
94
|
+
# link: https://www.iso.org/standard/20057.html
|
95
|
+
# lineage_source: ISO/TS 19104:2008
|
96
|
+
# lineage_source_similarity: 1
|
97
|
+
# date_accepted: 2008-11-15 00:00:00.000000000 +08:00
|
98
|
+
# review_date: 2013-01-29 00:00:00.000000000 +08:00
|
99
|
+
# review_status: final
|
100
|
+
# review_decision: accepted
|
101
|
+
# review_decision_date: 2016-10-01 00:00:00.000000000 +08:00
|
102
|
+
# review_decision_event: Publication of ISO 19104:2016
|
103
|
+
# review_decision_notes: Authoritative reference changed from ISO 1087-1:2000
|
104
|
+
# to ISO 1087-1:2000, 3.4.9. Lineage source added as ISO/TS 19104:2008
|
105
|
+
# release: '2'
|
@@ -1,32 +1,40 @@
|
|
1
1
|
require_relative "concept"
|
2
2
|
|
3
3
|
module Tc211::Termbase
|
4
|
+
class ConceptCollection < Hash
|
5
|
+
def add_term(term)
|
6
|
+
if self[term.id]
|
7
|
+
self[term.id].add_term(term)
|
8
|
+
else
|
9
|
+
self[term.id] = Concept.new(
|
10
|
+
id: term.id,
|
11
|
+
terms: [term],
|
12
|
+
)
|
13
|
+
end
|
14
|
+
end
|
4
15
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
self[term.id].add_term(term)
|
10
|
-
else
|
11
|
-
self[term.id] = Concept.new(
|
12
|
-
id: term.id,
|
13
|
-
terms: [term]
|
14
|
-
)
|
16
|
+
def to_hash
|
17
|
+
inject({}) do |acc, (id, concept)|
|
18
|
+
acc.merge!(id => concept.to_hash)
|
19
|
+
end
|
15
20
|
end
|
16
|
-
end
|
17
21
|
|
18
|
-
|
19
|
-
|
20
|
-
|
22
|
+
def to_file(filename)
|
23
|
+
File.open(filename, "w") do |file|
|
24
|
+
file.write(to_hash.to_yaml)
|
25
|
+
end
|
21
26
|
end
|
22
|
-
end
|
23
27
|
|
24
|
-
|
25
|
-
|
26
|
-
|
28
|
+
def to_concept_collection
|
29
|
+
collection = Glossarist::ManagedConceptCollection.new
|
30
|
+
|
31
|
+
values.each do |term_concept|
|
32
|
+
next if term_concept.nil?
|
33
|
+
|
34
|
+
collection.store(term_concept.to_glossarist_concept)
|
35
|
+
end
|
36
|
+
|
37
|
+
collection
|
27
38
|
end
|
28
39
|
end
|
29
|
-
|
30
40
|
end
|
31
|
-
|
32
|
-
end
|
@@ -1,22 +1,18 @@
|
|
1
1
|
require_relative "terminology_sheet"
|
2
2
|
|
3
3
|
module Tc211::Termbase
|
4
|
+
class InformationSheet < TerminologySheet
|
5
|
+
def metadata_section
|
6
|
+
sheet_array = @sheet.simple_rows.to_a
|
7
|
+
MetadataSection.new(sheet_array)
|
8
|
+
end
|
4
9
|
|
5
|
-
|
10
|
+
def to_hash
|
11
|
+
{ "glossary" => metadata_section.to_hash }
|
12
|
+
end
|
6
13
|
|
7
|
-
|
8
|
-
|
9
|
-
|
14
|
+
def to_yaml
|
15
|
+
to_hash.to_yaml
|
16
|
+
end
|
10
17
|
end
|
11
|
-
|
12
|
-
def to_hash
|
13
|
-
{ "glossary" => metadata_section.to_hash }
|
14
|
-
end
|
15
|
-
|
16
|
-
def to_yaml
|
17
|
-
to_hash.to_yaml
|
18
|
-
end
|
19
|
-
|
20
18
|
end
|
21
|
-
|
22
|
-
end
|
@@ -1,136 +1,132 @@
|
|
1
1
|
require_relative "sheet_section"
|
2
2
|
|
3
3
|
module Tc211::Termbase
|
4
|
+
class MetadataSection < SheetSection
|
5
|
+
attr_accessor :header_row
|
6
|
+
attr_writer :attributes
|
7
|
+
|
8
|
+
GLOSSARY_HEADER_ROW_MATCH = {
|
9
|
+
# "English" uses "".
|
10
|
+
# "Arabic" uses "A".
|
11
|
+
# This is fixed in the MLGT as of 2018 Aug 6.
|
12
|
+
"A" => [nil, "Item", "A"],
|
13
|
+
|
14
|
+
"C" => ["Data Type"],
|
15
|
+
"D" => ["Special Instruction"],
|
16
|
+
|
17
|
+
# "Malay" has it empty ("")
|
18
|
+
# This is fixed in the MLGT as of 2018 Aug 6.
|
19
|
+
"E" => ["ISO 19135 Class.attribute", nil],
|
20
|
+
|
21
|
+
"F" => ["Domain"],
|
22
|
+
}.freeze
|
23
|
+
|
24
|
+
GLOSSARY_ROW_KEY_MAP = {
|
25
|
+
"A" => "name",
|
26
|
+
"B" => "value",
|
27
|
+
"C" => "datatype",
|
28
|
+
"D" => "special-instruction",
|
29
|
+
"E" => "19135-class-attribute",
|
30
|
+
"F" => "value-domain",
|
31
|
+
}.freeze
|
32
|
+
|
33
|
+
def initialize(rows, options = {})
|
34
|
+
super
|
35
|
+
|
36
|
+
self.class.match_header(@rows[0])
|
37
|
+
@header_row = @rows[0]
|
38
|
+
@body_rows = @rows[1..-1]
|
39
|
+
attributes
|
40
|
+
self
|
41
|
+
end
|
4
42
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
"C" => ["Data Type"],
|
16
|
-
"D" => ["Special Instruction"],
|
17
|
-
|
18
|
-
# "Malay" has it empty ("")
|
19
|
-
# This is fixed in the MLGT as of 2018 Aug 6.
|
20
|
-
"E" => ["ISO 19135 Class.attribute", nil],
|
21
|
-
|
22
|
-
"F" => ["Domain"]
|
23
|
-
}
|
24
|
-
|
25
|
-
GLOSSARY_ROW_KEY_MAP = {
|
26
|
-
"A" => "name",
|
27
|
-
"B" => "value",
|
28
|
-
"C" => "datatype",
|
29
|
-
"D" => "special-instruction",
|
30
|
-
"E" => "19135-class-attribute",
|
31
|
-
"F" => "value-domain"
|
32
|
-
}
|
33
|
-
|
34
|
-
def initialize(rows, options={})
|
35
|
-
super
|
36
|
-
|
37
|
-
self.class.match_header(@rows[0])
|
38
|
-
@header_row = @rows[0]
|
39
|
-
@body_rows = @rows[1..-1]
|
40
|
-
attributes
|
41
|
-
self
|
42
|
-
end
|
43
|
-
|
44
|
-
def self.match_header(columns)
|
45
|
-
# puts "row #{row}"
|
46
|
-
columns.each do |key, value|
|
47
|
-
# puts "#{key}, #{value}"
|
48
|
-
if GLOSSARY_HEADER_ROW_MATCH[key]
|
49
|
-
unless GLOSSARY_HEADER_ROW_MATCH[key].include?(value)
|
50
|
-
raise RowHeaderMatchError.new("Metadata section header for column `#{key}` does not match expected value `#{value}`")
|
43
|
+
def self.match_header(columns)
|
44
|
+
# puts "row #{row}"
|
45
|
+
columns.each do |key, value|
|
46
|
+
# puts "#{key}, #{value}"
|
47
|
+
header_row_match = GLOSSARY_HEADER_ROW_MATCH[key]
|
48
|
+
if header_row_match && !header_row_match.include?(value)
|
49
|
+
raise RowHeaderMatchError.new(
|
50
|
+
"Metadata section header for column `#{key}` does not match \
|
51
|
+
expected value `#{value}`",
|
52
|
+
)
|
51
53
|
end
|
52
54
|
end
|
53
55
|
end
|
54
|
-
end
|
55
|
-
|
56
56
|
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
def structure
|
58
|
+
GLOSSARY_ROW_KEY_MAP
|
59
|
+
end
|
60
60
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
61
|
+
def clean_key(key)
|
62
|
+
key.strip
|
63
|
+
.downcase
|
64
|
+
.gsub(/[()]/, "")
|
65
|
+
.gsub(" ", "-")
|
66
|
+
end
|
67
67
|
|
68
|
-
|
69
|
-
|
68
|
+
def clean_value(value)
|
69
|
+
return nil if value.nil?
|
70
70
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
71
|
+
case value
|
72
|
+
when String
|
73
|
+
value.strip
|
74
|
+
else
|
75
|
+
value
|
76
|
+
end
|
76
77
|
end
|
77
|
-
end
|
78
78
|
|
79
|
-
|
80
|
-
|
81
|
-
attribute = {}
|
79
|
+
def parse_row(row)
|
80
|
+
return nil if row.empty?
|
82
81
|
|
83
|
-
|
84
|
-
# puts"#{key}, #{value}, #{row[key]}"
|
85
|
-
attribute_key = clean_key(value)
|
86
|
-
attribute_value = clean_value(row[key])
|
87
|
-
next if attribute_value.nil?
|
88
|
-
attribute[attribute_key] = attribute_value
|
89
|
-
end
|
82
|
+
attribute = {}
|
90
83
|
|
91
|
-
|
92
|
-
|
84
|
+
structure.each_pair do |key, value|
|
85
|
+
attribute_value = clean_value(row[key])
|
86
|
+
next if attribute_value.nil?
|
93
87
|
|
94
|
-
|
95
|
-
|
88
|
+
attribute[clean_key(value)] = attribute_value
|
89
|
+
end
|
96
90
|
|
97
|
-
|
98
|
-
|
91
|
+
# TODO: "Chinese" name is empty!
|
92
|
+
key = clean_key(attribute["name"] || "(empty)")
|
99
93
|
|
100
|
-
|
101
|
-
@body_rows.each do |row|
|
102
|
-
result = parse_row(row)
|
103
|
-
@attributes.merge!(result) if result
|
94
|
+
{ key => attribute }
|
104
95
|
end
|
105
|
-
@attributes
|
106
|
-
end
|
107
96
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
# "value-domain"=>
|
118
|
-
# "<<Data Type>>RE_Locale.country \n" +
|
119
|
-
# "[ISO 3166-1 3-character numeric country code]"},
|
120
|
-
#
|
121
|
-
|
122
|
-
attributes.inject({}) do |acc, (k, v)|
|
123
|
-
acc.merge({
|
124
|
-
k => v["value"]
|
125
|
-
})
|
97
|
+
def attributes
|
98
|
+
return @attributes if @attributes
|
99
|
+
|
100
|
+
@attributes = {}
|
101
|
+
@body_rows.each do |row|
|
102
|
+
result = parse_row(row)
|
103
|
+
@attributes.merge!(result) if result
|
104
|
+
end
|
105
|
+
@attributes
|
126
106
|
end
|
127
|
-
end
|
128
107
|
|
129
|
-
|
130
|
-
|
131
|
-
"
|
132
|
-
|
133
|
-
|
108
|
+
def fields
|
109
|
+
# "operating-language-country"=>
|
110
|
+
# {"name"=>"Operating Language Country",
|
111
|
+
# "value"=>"410",
|
112
|
+
# "datatype"=>"Country Code",
|
113
|
+
# "special-instruction"=>
|
114
|
+
# "ftp.ics.uci.edu/pub/ietf/http/related/iso3166.txt",
|
115
|
+
# "19135-class-attribute"=>"RE_Register.operatingLanguage",
|
116
|
+
# "value-domain"=>
|
117
|
+
# "<<Data Type>>RE_Locale.country \n" +
|
118
|
+
# "[ISO 3166-1 3-character numeric country code]"},
|
119
|
+
#
|
120
|
+
|
121
|
+
attributes.inject({}) do |acc, (k, v)|
|
122
|
+
acc.merge({ k => v["value"] })
|
123
|
+
end
|
124
|
+
end
|
134
125
|
|
135
|
-
|
126
|
+
def to_hash
|
127
|
+
{
|
128
|
+
"metadata" => fields,
|
129
|
+
}
|
130
|
+
end
|
131
|
+
end
|
136
132
|
end
|
@@ -1,30 +1,29 @@
|
|
1
1
|
|
2
2
|
module Tc211::Termbase
|
3
|
+
class SheetSection
|
3
4
|
|
4
|
-
class
|
5
|
+
class RowHeaderMatchError < StandardError; end
|
5
6
|
|
6
|
-
|
7
|
-
class UnknownHeaderError < StandardError; end
|
7
|
+
class UnknownHeaderError < StandardError; end
|
8
8
|
|
9
|
-
|
9
|
+
attr_accessor :sheet_content
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
@rows = rows
|
15
|
-
# @has_header = options[:has_header].nil? ? true : options[:has_header]
|
16
|
-
self
|
17
|
-
end
|
11
|
+
def initialize(rows, _options = {})
|
12
|
+
# rows is an array of rows!
|
13
|
+
raise unless rows.is_a?(Array)
|
18
14
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
15
|
+
@rows = rows
|
16
|
+
# @has_header = options[:has_header].nil? ? true : options[:has_header]
|
17
|
+
self
|
18
|
+
end
|
23
19
|
|
24
|
-
|
20
|
+
# Abstract method
|
21
|
+
def self.match_header(_row)
|
22
|
+
false
|
23
|
+
end
|
25
24
|
|
26
|
-
|
25
|
+
def self.identify_type(_row); end
|
27
26
|
|
28
|
-
|
27
|
+
# TODO
|
28
|
+
end
|
29
29
|
end
|
30
|
-
end
|