thai_romanize 0.0.1 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/thai_romanize.rb +21 -46
- data/thai_romanize.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 184a6f358fbcbf8cc9fe0d67a9ad311d5004daa34a0444ed256a22423c297b74
|
4
|
+
data.tar.gz: c1793fa4fbe038d43298bba837508060044d53a017af002d89cbed41e05ad3d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f7de5f696e935eb2a855e586c3b406ef31a820f0bce4295f5a6d02a53b0b1edc54caecfa04fcceb238c6826f24a40f24db0047edaae023be4a39498b7c0497de
|
7
|
+
data.tar.gz: a5fc9d897a724778b4e916bb8a50755a9d4b4baa5327c1ca4c89369db078dfa72e8f4ceceec926521b4af3b4dd461973b7b7415f37e00eeb4b3506f55652febc
|
data/lib/thai_romanize.rb
CHANGED
@@ -122,55 +122,30 @@ $ฤ,\\1ri"""
|
|
122
122
|
VOWELS.each { word.gsub!(_1, _2) }
|
123
123
|
return word
|
124
124
|
end
|
125
|
-
|
125
|
+
|
126
126
|
def self.replace_consonants(word, consonants)
|
127
127
|
return word unless consonants
|
128
128
|
return word.gsub(consonants[0], CONSONANTS[consonants[0]][0]) if consonants.length == 1
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
elsif
|
142
|
-
|
143
|
-
elsif
|
144
|
-
|
145
|
-
word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
|
146
|
-
elsif word[i] == "ร" and i + 1 < word.length
|
147
|
-
if word[i + 1] == "ร"
|
148
|
-
word = word.chars
|
149
|
-
word.delete_at(i + 1)
|
150
|
-
if i + 2 == len_cons
|
151
|
-
word[i] = "an"
|
152
|
-
else
|
153
|
-
word[i] = "a"
|
154
|
-
end
|
155
|
-
word = word.join("")
|
156
|
-
i += 1
|
157
|
-
elsif word[i] == "ร"
|
158
|
-
word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
|
159
|
-
i += 1
|
160
|
-
else
|
161
|
-
word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
|
162
|
-
i += 1
|
163
|
-
end
|
164
|
-
end
|
165
|
-
elsif word[i] == "ร"
|
166
|
-
word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
|
167
|
-
i += 1
|
129
|
+
consonants.reduce({rom: "", th: word}) do |w, consonant|
|
130
|
+
non_thai = w[:th].match(/^[^ก-์]+/)
|
131
|
+
if non_thai
|
132
|
+
w[:rom] += non_thai.to_s
|
133
|
+
w[:th] = w[:th][non_thai.to_s.length..-1]
|
134
|
+
end
|
135
|
+
if w[:skip]
|
136
|
+
{rom: w[:rom], th: w[:th]}
|
137
|
+
elsif w[:rom] == "" and w[:th] == "ห"
|
138
|
+
{rom: "", th: w[:th][1..-1]}
|
139
|
+
elsif w[:rom] == ""
|
140
|
+
{rom: CONSONANTS[consonant][0], th: w[:th][consonant.length..-1]}
|
141
|
+
elsif consonant == "ร" and w[:th] == "รร"
|
142
|
+
{rom: w[:rom] + "an", th: w[:th][2..-1], skip: true}
|
143
|
+
elsif consonant == "ร" and w[:th][0..1] == "รร"
|
144
|
+
{rom: w[:rom] + "a", th: w[:th][2..-1], skip: true}
|
168
145
|
else
|
169
|
-
|
170
|
-
i += 1
|
146
|
+
{rom: w[:rom] + CONSONANTS[consonant][1], th: w[:th][consonant.length..-1]}
|
171
147
|
end
|
172
|
-
end
|
173
|
-
return word
|
148
|
+
end[:rom]
|
174
149
|
end
|
175
150
|
|
176
151
|
def self.romanize_word(word)
|
@@ -187,8 +162,8 @@ $ฤ,\\1ri"""
|
|
187
162
|
|
188
163
|
WORDCUT = WordcutA::Wordcut.new(WordcutA::DEFAULT_THAI_DICT_PATH)
|
189
164
|
|
190
|
-
def self.romanize(text)
|
191
|
-
WORDCUT.into_strings(text).map { romanize_word _1 }.join(
|
165
|
+
def self.romanize(text, delim = "")
|
166
|
+
WORDCUT.into_strings(text).map { romanize_word _1 }.join(delim)
|
192
167
|
end
|
193
168
|
end
|
194
169
|
|
data/thai_romanize.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: thai_romanize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vee Satayamas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-04-
|
11
|
+
date: 2021-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A Thai romanization function ported from PyThaiNLP
|
14
14
|
email:
|