tibetan 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ae545936f34ae071778e0c393f2823541fd185a03891a05150e08e1093b82b4
4
- data.tar.gz: 121c95c185557a0411164b0ab5cbd742ddeec6a92d10af3935ae5b672c25a82d
3
+ metadata.gz: 97b9ceebfe79bfcae96bfa37793bddf7f2d860c219efcf7b0329c491e0882d4c
4
+ data.tar.gz: e3373daa2818419ec64cc69daf0b643f9631d311483e35a281609b495f6ce3d5
5
5
  SHA512:
6
- metadata.gz: 7c0d643ec58d358e566906907935392656091f537d07f6098877b2a38890a3300cda5ef76295ef6f19ca8e1bdeb23f4ed3900a3e057fc1ece165cda492ed4d86
7
- data.tar.gz: 851bae4b76b7b09312619683716e4356d857b20e76846aa1f42f394d8742fe05b684300b6e2ca5ecb176dc9590ea3787a1f3a95c5475f269e8d24f6bdcf78d0c
6
+ metadata.gz: 6dddc1dde6d9a3db2397dcf0429d32f114e3515c07c5bb67bccabba29de5febe4faa6f06afa0af3880f298c0064924c629fc55b67e4881ee8b451f15dad1cca3
7
+ data.tar.gz: f98150d13db94c934d0a39bb288c5d8532735595d43ecf75a9d3b17f96e8320e83df9ff30e0e78abcece8c98ae9dd371b6ea9b259c2338dfa4ea8ae660d80f4e
data/Gemfile.lock CHANGED
@@ -1,22 +1,26 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tibetan (0.1.3)
4
+ tibetan (0.1.5)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
- minitest (5.25.5)
10
- rake (13.2.1)
9
+ drb (2.2.3)
10
+ minitest (6.0.2)
11
+ drb (~> 2.0)
12
+ prism (~> 1.5)
13
+ prism (1.9.0)
14
+ rake (13.3.1)
11
15
 
12
16
  PLATFORMS
13
17
  ruby
14
18
 
15
19
  DEPENDENCIES
16
- bundler (~> 2.6)
17
- minitest (~> 5.25)
18
- rake (~> 13.2)
20
+ bundler (>= 2.6)
21
+ minitest (~> 6.0)
22
+ rake (~> 13.3)
19
23
  tibetan!
20
24
 
21
25
  BUNDLED WITH
22
- 2.6.7
26
+ 4.0.9
data/README.md CHANGED
@@ -40,6 +40,9 @@ Tibetan.t("༪") # => "0.5"
40
40
  ```
41
41
 
42
42
  ## References
43
+ https://rywiki.tsadra.org/index.php/Wylie
44
+ https://sorig.info/images/my-pages/about/transliteration-en.pdf
45
+ https://resources.christian-steinert.de/download/WylieTransliteration.pdf
43
46
  http://www.thlib.org/reference/transliteration/#!essay=/thl/ewts/meta/
44
47
  http://www.thlib.org/reference/transliteration/teachingewts.pdf
45
48
  http://www.thlib.org/reference/transliteration/phconverter.php
@@ -1,3 +1,3 @@
1
1
  module Tibetan
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
data/lib/tibetan.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "tibetan/version"
2
4
 
3
5
  module Tibetan
@@ -107,6 +109,12 @@ module Tibetan
107
109
  "ྷ" => "h",
108
110
  "ཨ" => "a",
109
111
  "ྸ" => "a",
112
+ # Retroflex Consonants
113
+ "ཊ" => "T", "ྚ" => "T",
114
+ "ཋ" => "Th", "ྛ" => "Th",
115
+ "ཌ" => "Da", "ྜ" => "D",
116
+ "ཎ" => "N", "ྞ" => "N",
117
+ "ཥ" => "S", "ྵ" => "S",
110
118
  # Vowels
111
119
  "ི" => "i",
112
120
  "ུ" => "u",
@@ -124,25 +132,37 @@ module Tibetan
124
132
  "ཹ" => "ḹ",
125
133
  }
126
134
 
127
- CONSONANTS = %w(ཀ ཁ ག ང ཅ ཆ ཇ ཉ ཏ ཐ ད ན པ ཕ བ མ ཙ ཚ ཛ ཝ ཞ ཟ འ ཡ ར ལ ཤ ས ཧ)
128
- SUBSCRIPTS = %w(ྐ ྑ ྒ ྔ ྕ ྖ ྗ ྙ ྟ ྠ ྡ ྣ ྤ ྥ ྦ ྨ ྩ ྪ ྫ ྭ ྮ ྯ ྰ ྱ ྲ ླ ྴ ྶ ྷ)
135
+ CONSONANTS = %w(ཀ ཁ ག ང ཅ ཆ ཇ ཉ ཏ ཐ ད ན པ ཕ བ མ ཙ ཚ ཛ ཝ ཞ ཟ འ ཡ ར ལ ཤ ས ཧ ཊ ཋ ཌ ཎ ཥ)
136
+ SUBSCRIPTS = %w(ྐ ྑ ྒ ྔ ྕ ྖ ྗ ྙ ྟ ྠ ྡ ྣ ྤ ྥ ྦ ྨ ྩ ྪ ྫ ྭ ྮ ྯ ྰ ྱ ྲ ླ ྴ ྶ ྷ ྚ ྛ ྜ ྞ ྵ)
129
137
  VOWELS = %w(ྸ ི ུ ེ ོ a)
130
138
  SEP = "་"
131
139
  DEFAULT_VOWEL = "a"
132
140
 
133
141
  class << self
134
142
  def transliterate(string="", to=:tibetan)
135
- string = string.to_s
143
+ string = string.to_s.dup
136
144
 
137
145
  # Split long phrase into small parts and transliterate separately
138
- if string.split(SEP).size > 1
139
- string = string.split(SEP).map do |str|
146
+ # Split by anything that isn't a Tibetan consonant, subscript, or vowel
147
+ parts = string.split(/([^#{CONSONANTS.join}#{SUBSCRIPTS.join}#{VOWELS.join}]+)/).reject(&:empty?)
148
+ if parts.size > 1
149
+ res = parts.map do |str|
140
150
  transliterate(str)
141
- end.join(SEP)
151
+ end.join
152
+ # During mapping, trailing tseks become spaces. This cleans up erroneous spaces
153
+ # before closing punctuation (e.g. «pe » -> «pe») and drops trailing spaces.
154
+ res.gsub!(/ +([»\]\)]|\Z)/, '\1')
155
+ return res
142
156
  end
143
-
157
+
158
+ # Implicit vowel 'a' before 'a-chung preceded by a consonant/subscript
159
+ string.gsub!(/([#{CONSONANTS.join}#{SUBSCRIPTS.join}])འ/, '\1aའ')
160
+
144
161
  insert_default_vowel!(string)
145
162
 
163
+ # Exception: distinguish prefix 'g' and root 'y' (g.y) from root 'g' and subjoined 'y' (gy)
164
+ string.gsub!("གཡ", "ག.ཡ")
165
+
146
166
  character_table = Module.const_get(to.to_s.capitalize)::CHARACTER_TABLE
147
167
  string.to_s.gsub(/#{Regexp.union(character_table.keys).source}/i, character_table)
148
168
  end
@@ -151,13 +171,30 @@ module Tibetan
151
171
  def insert_default_vowel!(string="")
152
172
  # 1. after subscript
153
173
  if (string.chars & VOWELS).empty?
154
- index = string.rindex(/#{SUBSCRIPTS.join('|')}/)
155
- string = string.insert(index+1, DEFAULT_VOWEL) unless index.nil?
156
- end
157
- # 2. after consonant, if not added in 1st step
158
- if (string.chars & VOWELS).empty? && (string.chars & CONSONANTS).any?
159
- index = string.size > 2 ? 1 : 0
160
- string = string.insert(index+1, DEFAULT_VOWEL) unless index.nil?
174
+ if (sub_idx = string.rindex(/#{SUBSCRIPTS.join('|')}/))
175
+ string = string.insert(sub_idx+1, DEFAULT_VOWEL)
176
+ elsif (string.chars & CONSONANTS).any?
177
+ # 2. after consonant, if not added in 1st step
178
+ # Count ONLY Tibetan consonants/subscripts to identify the root letter.
179
+ # We ignore inline punctuation (like ») to avoid inflating the string size
180
+ # and placing the implicit 'a' in the wrong position.
181
+ tibetan_chars_count = string.chars.count { |c| CONSONANTS.include?(c) || SUBSCRIPTS.include?(c) }
182
+ root_idx = tibetan_chars_count > 2 ? 1 : 0
183
+
184
+ # Find the actual string index corresponding to the root consonant
185
+ current = -1
186
+ actual_index = -1
187
+ string.chars.each_with_index do |c, i|
188
+ if CONSONANTS.include?(c) || SUBSCRIPTS.include?(c)
189
+ current += 1
190
+ if current == root_idx
191
+ actual_index = i
192
+ break
193
+ end
194
+ end
195
+ end
196
+ string = string.insert(actual_index+1, DEFAULT_VOWEL) if actual_index >= 0
197
+ end
161
198
  end
162
199
  end
163
200
  end
data/tibetan.gemspec CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
22
  spec.require_paths = ["lib"]
23
23
 
24
- spec.add_development_dependency "bundler", "~> 2.6"
25
- spec.add_development_dependency "rake", "~> 13.2"
26
- spec.add_development_dependency "minitest", "~> 5.25"
24
+ spec.add_development_dependency "bundler", ">= 2.6"
25
+ spec.add_development_dependency "rake", "~> 13.3"
26
+ spec.add_development_dependency "minitest", "~> 6.0"
27
27
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tibetan
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - rovetz
@@ -13,14 +13,14 @@ dependencies:
13
13
  name: bundler
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
- - - "~>"
16
+ - - ">="
17
17
  - !ruby/object:Gem::Version
18
18
  version: '2.6'
19
19
  type: :development
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
- - - "~>"
23
+ - - ">="
24
24
  - !ruby/object:Gem::Version
25
25
  version: '2.6'
26
26
  - !ruby/object:Gem::Dependency
@@ -29,28 +29,28 @@ dependencies:
29
29
  requirements:
30
30
  - - "~>"
31
31
  - !ruby/object:Gem::Version
32
- version: '13.2'
32
+ version: '13.3'
33
33
  type: :development
34
34
  prerelease: false
35
35
  version_requirements: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '13.2'
39
+ version: '13.3'
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: minitest
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '5.25'
46
+ version: '6.0'
47
47
  type: :development
48
48
  prerelease: false
49
49
  version_requirements: !ruby/object:Gem::Requirement
50
50
  requirements:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
- version: '5.25'
53
+ version: '6.0'
54
54
  description: Romanization (transliteration) of Tibetan. Converts Tibetan text to the
55
55
  Roman (Latin) script using THL EWTS (Extended Wylie Transliteration Scheme)
56
56
  email:
@@ -91,7 +91,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
91
91
  - !ruby/object:Gem::Version
92
92
  version: '0'
93
93
  requirements: []
94
- rubygems_version: 3.6.7
94
+ rubygems_version: 4.0.6
95
95
  specification_version: 4
96
96
  summary: Romanization (transliteration) of Tibetan
97
97
  test_files: []