tc211-termbase 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c9a0aebdaae6417ffe714134f7ff97650f3a49d391e2f5d3ec9181b9a62b1560
4
- data.tar.gz: aad88b45899338481d5b1f5e422ea06b2d7181d634f251395d09f0ed49e75f71
3
+ metadata.gz: e1b3d104ef64dd44e3183124f12c5fe0ac263027c88d5ad270005b7d41f96928
4
+ data.tar.gz: df503bb5bbcfed34f3fc9a066e90c40fb46cace48dbf544b7c194d1e8fded62b
5
5
  SHA512:
6
- metadata.gz: e685a0c0abbf203c6219fc567b4f252e68cfa97047794570fd2c9c651c65d64539a13ded1e34e7010afefdeafc280d1ca21d3dd2573c673a5f56055d40b6bfd8
7
- data.tar.gz: 0dcceeafb92e359c36d0627094d9fa20702d220e752eeba6b9e5d75f1d6316e5f3566aae541c71be1301072f1c1f82a1846e0b126ca5f7d3c31fb77f411d70d4
6
+ metadata.gz: 8c9b347c742fa57367d62785b60f92e4692db8439c134be632b6389104507ae0e6c7858a14a333ef7a90febe4b33b8fc62dbe7d4e8d6eee53cbaeb93864859d4
7
+ data.tar.gz: ebeda081cbc25e5b1aeb401346957cd7cca8ec5731229582e601ee526dbaf1f23256c1a51b4b09191148b53255118db4258a9044373a4cc4c528444291367b29
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tc211-termbase (0.1.10)
4
+ tc211-termbase (0.1.11)
5
5
  creek
6
6
  iso-639
7
7
  relaton (~> 0.5)
@@ -22,30 +22,47 @@ end
22
22
 
23
23
 
24
24
  workbook = Tc211::Termbase::TermWorkbook.new(filepath)
25
- workbook.glossary_info.metadata_section.structure
26
- workbook.glossary_info.metadata_section.attributes
25
+ # pp workbook.glossary_info.metadata_section.structure
26
+ # pp workbook.glossary_info.metadata_section.attributes
27
27
 
28
- languages = {}
28
+ registries = {}
29
29
 
30
30
  workbook.languages_supported.map do |lang|
31
31
  puts "************** WORKING ON LANGUAGE (#{lang})"
32
32
  sheet = workbook.language_sheet(lang)
33
33
  termsec = sheet.terms_section
34
- languages[sheet.language_code] = termsec.terms
34
+ mdsec = sheet.metadata_section
35
+ registries[sheet.language_code] = {
36
+ metadata: mdsec.to_hash["metadata"],
37
+ terms: termsec.terms
38
+ }
35
39
  end
36
40
 
41
+ output_dir = Dir.pwd
42
+
43
+ metadata = {
44
+ 'header' => workbook.glossary_info.metadata_section.to_hash["metadata"]
45
+ }
46
+ # Write registry metadata
47
+ metadata['languages'] = registries.inject({}) do |acc, (lang, data)|
48
+ acc.merge({lang => data[:metadata]})
49
+ end
50
+
51
+ File.open(File.join(output_dir, Pathname.new(filepath).basename.sub_ext(".meta.yaml")),"w") do |file|
52
+ file.write(metadata.to_yaml)
53
+ end
54
+
55
+
37
56
  collection = Tc211::Termbase::ConceptCollection.new
38
57
 
39
- languages.each_pair do |lang, terms|
40
- terms.each do |term|
58
+ registries.each_pair do |lang, data|
59
+ data[:terms].each do |term|
41
60
  collection.add_term(term)
42
61
  end
43
62
  end
44
63
 
45
64
  # collection[1206].inspect
46
65
 
47
- output_dir = Dir.pwd
48
-
49
66
  collection.to_file(File.join(output_dir, Pathname.new(filepath).basename.sub_ext(".yaml")))
50
67
 
51
68
  collection_output_dir = File.join(output_dir, "concepts")
@@ -58,20 +58,38 @@ class MetadataSection < SheetSection
58
58
  GLOSSARY_ROW_KEY_MAP
59
59
  end
60
60
 
61
+ def clean_key(k)
62
+ k.strip.
63
+ downcase.
64
+ gsub(/[()]/,"").
65
+ gsub(' ', '-')
66
+ end
67
+
68
+ def clean_value(v)
69
+ return nil if v.nil?
70
+
71
+ case v
72
+ when String
73
+ v.strip
74
+ else
75
+ v
76
+ end
77
+ end
78
+
61
79
  def parse_row(row)
62
80
  return nil if row.empty?
63
81
  attribute = {}
64
82
 
65
83
  structure.each_pair do |key, value|
66
84
  # puts"#{key}, #{value}, #{row[key]}"
67
- attribute_key = value
68
- attribute_value = row[key]
85
+ attribute_key = clean_key(value)
86
+ attribute_value = clean_value(row[key])
69
87
  next if attribute_value.nil?
70
88
  attribute[attribute_key] = attribute_value
71
89
  end
72
90
 
73
91
  # TODO: "Chinese" name is empty!
74
- key = (attribute["name"] || "(empty)").downcase.split(" ").join("-")
92
+ key = clean_key(attribute["name"] || "(empty)")
75
93
 
76
94
  { key => attribute }
77
95
  end
@@ -87,9 +105,30 @@ class MetadataSection < SheetSection
87
105
  @attributes
88
106
  end
89
107
 
108
+ def fields
109
+
110
+ # "operating-language-country"=>
111
+ # {"name"=>"Operating Language Country",
112
+ # "value"=>"410",
113
+ # "datatype"=>"Country Code",
114
+ # "special-instruction"=>
115
+ # "ftp.ics.uci.edu/pub/ietf/http/related/iso3166.txt",
116
+ # "19135-class-attribute"=>"RE_Register.operatingLanguage",
117
+ # "value-domain"=>
118
+ # "<<Data Type>>RE_Locale.country \n" +
119
+ # "[ISO 3166-1 3-character numeric country code]"},
120
+ #
121
+
122
+ attributes.inject({}) do |acc, (k, v)|
123
+ acc.merge({
124
+ k => v["value"]
125
+ })
126
+ end
127
+ end
128
+
90
129
  def to_hash
91
130
  {
92
- "metadata" => attributes
131
+ "metadata" => fields
93
132
  }
94
133
  end
95
134
 
@@ -105,12 +105,12 @@ class Term
105
105
 
106
106
  def add_example(example)
107
107
  c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
108
- @examples << c
108
+ @examples << c unless c.empty?
109
109
  end
110
110
 
111
111
  def add_note(note)
112
112
  c = clean_prefixed_string(note, NOTE_PREFIXES)
113
- @notes << c
113
+ @notes << c unless c.empty?
114
114
  end
115
115
 
116
116
  def clean_prefixed_string(string, criterion_map)
@@ -52,6 +52,14 @@ class TerminologySheet
52
52
  end
53
53
  end
54
54
 
55
+ def metadata_section
56
+ sections
57
+
58
+ sections.detect do |section|
59
+ section.is_a?(MetadataSection)
60
+ end
61
+ end
62
+
55
63
  def sections
56
64
  return @sections if @sections
57
65
 
@@ -1,5 +1,5 @@
1
1
  module Tc211
2
2
  module Termbase
3
- VERSION = "0.1.10"
3
+ VERSION = "0.1.11"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tc211-termbase
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.10
4
+ version: 0.1.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-08-10 00:00:00.000000000 Z
11
+ date: 2019-08-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: iso-639