tibetan 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +11 -7
- data/README.md +3 -0
- data/lib/tibetan/version.rb +1 -1
- data/lib/tibetan.rb +51 -14
- data/tibetan.gemspec +3 -3
- metadata +8 -8
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 97b9ceebfe79bfcae96bfa37793bddf7f2d860c219efcf7b0329c491e0882d4c
|
|
4
|
+
data.tar.gz: e3373daa2818419ec64cc69daf0b643f9631d311483e35a281609b495f6ce3d5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6dddc1dde6d9a3db2397dcf0429d32f114e3515c07c5bb67bccabba29de5febe4faa6f06afa0af3880f298c0064924c629fc55b67e4881ee8b451f15dad1cca3
|
|
7
|
+
data.tar.gz: f98150d13db94c934d0a39bb288c5d8532735595d43ecf75a9d3b17f96e8320e83df9ff30e0e78abcece8c98ae9dd371b6ea9b259c2338dfa4ea8ae660d80f4e
|
data/Gemfile.lock
CHANGED
|
@@ -1,22 +1,26 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
tibetan (0.1.
|
|
4
|
+
tibetan (0.1.5)
|
|
5
5
|
|
|
6
6
|
GEM
|
|
7
7
|
remote: https://rubygems.org/
|
|
8
8
|
specs:
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
drb (2.2.3)
|
|
10
|
+
minitest (6.0.2)
|
|
11
|
+
drb (~> 2.0)
|
|
12
|
+
prism (~> 1.5)
|
|
13
|
+
prism (1.9.0)
|
|
14
|
+
rake (13.3.1)
|
|
11
15
|
|
|
12
16
|
PLATFORMS
|
|
13
17
|
ruby
|
|
14
18
|
|
|
15
19
|
DEPENDENCIES
|
|
16
|
-
bundler (
|
|
17
|
-
minitest (~>
|
|
18
|
-
rake (~> 13.
|
|
20
|
+
bundler (>= 2.6)
|
|
21
|
+
minitest (~> 6.0)
|
|
22
|
+
rake (~> 13.3)
|
|
19
23
|
tibetan!
|
|
20
24
|
|
|
21
25
|
BUNDLED WITH
|
|
22
|
-
|
|
26
|
+
4.0.9
|
data/README.md
CHANGED
|
@@ -40,6 +40,9 @@ Tibetan.t("༪") # => "0.5"
|
|
|
40
40
|
```
|
|
41
41
|
|
|
42
42
|
## References
|
|
43
|
+
https://rywiki.tsadra.org/index.php/Wylie
|
|
44
|
+
https://sorig.info/images/my-pages/about/transliteration-en.pdf
|
|
45
|
+
https://resources.christian-steinert.de/download/WylieTransliteration.pdf
|
|
43
46
|
http://www.thlib.org/reference/transliteration/#!essay=/thl/ewts/meta/
|
|
44
47
|
http://www.thlib.org/reference/transliteration/teachingewts.pdf
|
|
45
48
|
http://www.thlib.org/reference/transliteration/phconverter.php
|
data/lib/tibetan/version.rb
CHANGED
data/lib/tibetan.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require "tibetan/version"
|
|
2
4
|
|
|
3
5
|
module Tibetan
|
|
@@ -107,6 +109,12 @@ module Tibetan
|
|
|
107
109
|
"ྷ" => "h",
|
|
108
110
|
"ཨ" => "a",
|
|
109
111
|
"ྸ" => "a",
|
|
112
|
+
# Retroflex Consonants
|
|
113
|
+
"ཊ" => "T", "ྚ" => "T",
|
|
114
|
+
"ཋ" => "Th", "ྛ" => "Th",
|
|
115
|
+
"ཌ" => "Da", "ྜ" => "D",
|
|
116
|
+
"ཎ" => "N", "ྞ" => "N",
|
|
117
|
+
"ཥ" => "S", "ྵ" => "S",
|
|
110
118
|
# Vowels
|
|
111
119
|
"ི" => "i",
|
|
112
120
|
"ུ" => "u",
|
|
@@ -124,25 +132,37 @@ module Tibetan
|
|
|
124
132
|
"ཹ" => "ḹ",
|
|
125
133
|
}
|
|
126
134
|
|
|
127
|
-
CONSONANTS = %w(ཀ ཁ ག ང ཅ ཆ ཇ ཉ ཏ ཐ ད ན པ ཕ བ མ ཙ ཚ ཛ ཝ ཞ ཟ འ ཡ ར ལ ཤ ས ཧ)
|
|
128
|
-
SUBSCRIPTS = %w(ྐ ྑ ྒ ྔ ྕ ྖ ྗ ྙ ྟ ྠ ྡ ྣ ྤ ྥ ྦ ྨ ྩ ྪ ྫ ྭ ྮ ྯ ྰ ྱ ྲ ླ ྴ ྶ ྷ)
|
|
135
|
+
CONSONANTS = %w(ཀ ཁ ག ང ཅ ཆ ཇ ཉ ཏ ཐ ད ན པ ཕ བ མ ཙ ཚ ཛ ཝ ཞ ཟ འ ཡ ར ལ ཤ ས ཧ ཊ ཋ ཌ ཎ ཥ)
|
|
136
|
+
SUBSCRIPTS = %w(ྐ ྑ ྒ ྔ ྕ ྖ ྗ ྙ ྟ ྠ ྡ ྣ ྤ ྥ ྦ ྨ ྩ ྪ ྫ ྭ ྮ ྯ ྰ ྱ ྲ ླ ྴ ྶ ྷ ྚ ྛ ྜ ྞ ྵ)
|
|
129
137
|
VOWELS = %w(ྸ ི ུ ེ ོ a)
|
|
130
138
|
SEP = "་"
|
|
131
139
|
DEFAULT_VOWEL = "a"
|
|
132
140
|
|
|
133
141
|
class << self
|
|
134
142
|
def transliterate(string="", to=:tibetan)
|
|
135
|
-
string = string.to_s
|
|
143
|
+
string = string.to_s.dup
|
|
136
144
|
|
|
137
145
|
# Split long phrase into small parts and transliterate separately
|
|
138
|
-
|
|
139
|
-
|
|
146
|
+
# Split by anything that isn't a Tibetan consonant, subscript, or vowel
|
|
147
|
+
parts = string.split(/([^#{CONSONANTS.join}#{SUBSCRIPTS.join}#{VOWELS.join}]+)/).reject(&:empty?)
|
|
148
|
+
if parts.size > 1
|
|
149
|
+
res = parts.map do |str|
|
|
140
150
|
transliterate(str)
|
|
141
|
-
end.join
|
|
151
|
+
end.join
|
|
152
|
+
# During mapping, trailing tseks become spaces. This cleans up erroneous spaces
|
|
153
|
+
# before closing punctuation (e.g. «pe » -> «pe») and drops trailing spaces.
|
|
154
|
+
res.gsub!(/ +([»\]\)]|\Z)/, '\1')
|
|
155
|
+
return res
|
|
142
156
|
end
|
|
143
|
-
|
|
157
|
+
|
|
158
|
+
# Implicit vowel 'a' before 'a-chung preceded by a consonant/subscript
|
|
159
|
+
string.gsub!(/([#{CONSONANTS.join}#{SUBSCRIPTS.join}])འ/, '\1aའ')
|
|
160
|
+
|
|
144
161
|
insert_default_vowel!(string)
|
|
145
162
|
|
|
163
|
+
# Exception: distinguish prefix 'g' and root 'y' (g.y) from root 'g' and subjoined 'y' (gy)
|
|
164
|
+
string.gsub!("གཡ", "ག.ཡ")
|
|
165
|
+
|
|
146
166
|
character_table = Module.const_get(to.to_s.capitalize)::CHARACTER_TABLE
|
|
147
167
|
string.to_s.gsub(/#{Regexp.union(character_table.keys).source}/i, character_table)
|
|
148
168
|
end
|
|
@@ -151,13 +171,30 @@ module Tibetan
|
|
|
151
171
|
def insert_default_vowel!(string="")
|
|
152
172
|
# 1. after subscript
|
|
153
173
|
if (string.chars & VOWELS).empty?
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
174
|
+
if (sub_idx = string.rindex(/#{SUBSCRIPTS.join('|')}/))
|
|
175
|
+
string = string.insert(sub_idx+1, DEFAULT_VOWEL)
|
|
176
|
+
elsif (string.chars & CONSONANTS).any?
|
|
177
|
+
# 2. after consonant, if not added in 1st step
|
|
178
|
+
# Count ONLY Tibetan consonants/subscripts to identify the root letter.
|
|
179
|
+
# We ignore inline punctuation (like ») to avoid inflating the string size
|
|
180
|
+
# and placing the implicit 'a' in the wrong position.
|
|
181
|
+
tibetan_chars_count = string.chars.count { |c| CONSONANTS.include?(c) || SUBSCRIPTS.include?(c) }
|
|
182
|
+
root_idx = tibetan_chars_count > 2 ? 1 : 0
|
|
183
|
+
|
|
184
|
+
# Find the actual string index corresponding to the root consonant
|
|
185
|
+
current = -1
|
|
186
|
+
actual_index = -1
|
|
187
|
+
string.chars.each_with_index do |c, i|
|
|
188
|
+
if CONSONANTS.include?(c) || SUBSCRIPTS.include?(c)
|
|
189
|
+
current += 1
|
|
190
|
+
if current == root_idx
|
|
191
|
+
actual_index = i
|
|
192
|
+
break
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
string = string.insert(actual_index+1, DEFAULT_VOWEL) if actual_index >= 0
|
|
197
|
+
end
|
|
161
198
|
end
|
|
162
199
|
end
|
|
163
200
|
end
|
data/tibetan.gemspec
CHANGED
|
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
|
21
21
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
22
22
|
spec.require_paths = ["lib"]
|
|
23
23
|
|
|
24
|
-
spec.add_development_dependency "bundler", "
|
|
25
|
-
spec.add_development_dependency "rake", "~> 13.
|
|
26
|
-
spec.add_development_dependency "minitest", "~>
|
|
24
|
+
spec.add_development_dependency "bundler", ">= 2.6"
|
|
25
|
+
spec.add_development_dependency "rake", "~> 13.3"
|
|
26
|
+
spec.add_development_dependency "minitest", "~> 6.0"
|
|
27
27
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tibetan
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- rovetz
|
|
@@ -13,14 +13,14 @@ dependencies:
|
|
|
13
13
|
name: bundler
|
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
|
15
15
|
requirements:
|
|
16
|
-
- - "
|
|
16
|
+
- - ">="
|
|
17
17
|
- !ruby/object:Gem::Version
|
|
18
18
|
version: '2.6'
|
|
19
19
|
type: :development
|
|
20
20
|
prerelease: false
|
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
22
22
|
requirements:
|
|
23
|
-
- - "
|
|
23
|
+
- - ">="
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
25
|
version: '2.6'
|
|
26
26
|
- !ruby/object:Gem::Dependency
|
|
@@ -29,28 +29,28 @@ dependencies:
|
|
|
29
29
|
requirements:
|
|
30
30
|
- - "~>"
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
|
-
version: '13.
|
|
32
|
+
version: '13.3'
|
|
33
33
|
type: :development
|
|
34
34
|
prerelease: false
|
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
36
36
|
requirements:
|
|
37
37
|
- - "~>"
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
|
-
version: '13.
|
|
39
|
+
version: '13.3'
|
|
40
40
|
- !ruby/object:Gem::Dependency
|
|
41
41
|
name: minitest
|
|
42
42
|
requirement: !ruby/object:Gem::Requirement
|
|
43
43
|
requirements:
|
|
44
44
|
- - "~>"
|
|
45
45
|
- !ruby/object:Gem::Version
|
|
46
|
-
version: '
|
|
46
|
+
version: '6.0'
|
|
47
47
|
type: :development
|
|
48
48
|
prerelease: false
|
|
49
49
|
version_requirements: !ruby/object:Gem::Requirement
|
|
50
50
|
requirements:
|
|
51
51
|
- - "~>"
|
|
52
52
|
- !ruby/object:Gem::Version
|
|
53
|
-
version: '
|
|
53
|
+
version: '6.0'
|
|
54
54
|
description: Romanization (transliteration) of Tibetan. Converts Tibetan text to the
|
|
55
55
|
Roman (Latin) script using THL EWTS (Extended Wylie Transliteration Scheme)
|
|
56
56
|
email:
|
|
@@ -91,7 +91,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
91
91
|
- !ruby/object:Gem::Version
|
|
92
92
|
version: '0'
|
|
93
93
|
requirements: []
|
|
94
|
-
rubygems_version:
|
|
94
|
+
rubygems_version: 4.0.6
|
|
95
95
|
specification_version: 4
|
|
96
96
|
summary: Romanization (transliteration) of Tibetan
|
|
97
97
|
test_files: []
|