thai_romanize 0.0.1 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4119c9e70af6d5ef11b5288918e847d5e86bb2c6fb3cc3b2f029bada57a07f5c
4
- data.tar.gz: 30675cec29f5e8e47d7273cc21821ee8eb8bbaf29b86e2ff0898ceffdc78e864
3
+ metadata.gz: 184a6f358fbcbf8cc9fe0d67a9ad311d5004daa34a0444ed256a22423c297b74
4
+ data.tar.gz: c1793fa4fbe038d43298bba837508060044d53a017af002d89cbed41e05ad3d0
5
5
  SHA512:
6
- metadata.gz: 72b06ca00efb7b0a52d482b77bab6637b53f67524cb266435237e7f6d0517d2cf2a68bb409a3e7a015deeae0bacdad3f3d0bad6970ee0a43cb1a0b7069365865
7
- data.tar.gz: beccb03e2e83b737d7c59e0a574b45bbfe85be7525ef461e96519233565d6f2b47123b4abcd0df0a912d27b607370e471190bf5ef7abbe1bcb32f73e094ba447
6
+ metadata.gz: f7de5f696e935eb2a855e586c3b406ef31a820f0bce4295f5a6d02a53b0b1edc54caecfa04fcceb238c6826f24a40f24db0047edaae023be4a39498b7c0497de
7
+ data.tar.gz: a5fc9d897a724778b4e916bb8a50755a9d4b4baa5327c1ca4c89369db078dfa72e8f4ceceec926521b4af3b4dd461973b7b7415f37e00eeb4b3506f55652febc
data/lib/thai_romanize.rb CHANGED
@@ -122,55 +122,30 @@ $ฤ,\\1ri"""
122
122
  VOWELS.each { word.gsub!(_1, _2) }
123
123
  return word
124
124
  end
125
-
125
+
126
126
  def self.replace_consonants(word, consonants)
127
127
  return word unless consonants
128
128
  return word.gsub(consonants[0], CONSONANTS[consonants[0]][0]) if consonants.length == 1
129
- i = 0
130
- len_cons = consonants.length
131
- while i < len_cons
132
- if i == 0
133
- if consonants[0] == "ห"
134
- word.gsub!(consonants[0], "")
135
- consonants.delete_at(0)
136
- len_cons -= 1
137
- else
138
- word.gsub!(consonants[0], CONSONANTS[consonants[0]][0])
139
- i += 1
140
- end
141
- elsif consonants[i] == "ร" and i == word.length and word[i - 1] == ""
142
- word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
143
- elsif consonants[i] == "ร" and i < word.length
144
- if i + 1 == word.length and word[i] == "ร"
145
- word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
146
- elsif word[i] == "ร" and i + 1 < word.length
147
- if word[i + 1] == "ร"
148
- word = word.chars
149
- word.delete_at(i + 1)
150
- if i + 2 == len_cons
151
- word[i] = "an"
152
- else
153
- word[i] = "a"
154
- end
155
- word = word.join("")
156
- i += 1
157
- elsif word[i] == "ร"
158
- word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
159
- i += 1
160
- else
161
- word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
162
- i += 1
163
- end
164
- end
165
- elsif word[i] == "ร"
166
- word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
167
- i += 1
129
+ consonants.reduce({rom: "", th: word}) do |w, consonant|
130
+ non_thai = w[:th].match(/^[^ก-์]+/)
131
+ if non_thai
132
+ w[:rom] += non_thai.to_s
133
+ w[:th] = w[:th][non_thai.to_s.length..-1]
134
+ end
135
+ if w[:skip]
136
+ {rom: w[:rom], th: w[:th]}
137
+ elsif w[:rom] == "" and w[:th] == "ห"
138
+ {rom: "", th: w[:th][1..-1]}
139
+ elsif w[:rom] == ""
140
+ {rom: CONSONANTS[consonant][0], th: w[:th][consonant.length..-1]}
141
+ elsif consonant == "ร" and w[:th] == "รร"
142
+ {rom: w[:rom] + "an", th: w[:th][2..-1], skip: true}
143
+ elsif consonant == "ร" and w[:th][0..1] == "รร"
144
+ {rom: w[:rom] + "a", th: w[:th][2..-1], skip: true}
168
145
  else
169
- word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
170
- i += 1
146
+ {rom: w[:rom] + CONSONANTS[consonant][1], th: w[:th][consonant.length..-1]}
171
147
  end
172
- end
173
- return word
148
+ end[:rom]
174
149
  end
175
150
 
176
151
  def self.romanize_word(word)
@@ -187,8 +162,8 @@ $ฤ,\\1ri"""
187
162
 
188
163
  WORDCUT = WordcutA::Wordcut.new(WordcutA::DEFAULT_THAI_DICT_PATH)
189
164
 
190
- def self.romanize(text)
191
- WORDCUT.into_strings(text).map { romanize_word _1 }.join(" ")
165
+ def self.romanize(text, delim = "")
166
+ WORDCUT.into_strings(text).map { romanize_word _1 }.join(delim)
192
167
  end
193
168
  end
194
169
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'thai_romanize'
3
- s.version = '0.0.1'
3
+ s.version = '0.0.3'
4
4
  s.authors = ['Vee Satayamas']
5
5
  s.email = ['5ssgdxltv@relay.firefox.com']
6
6
  s.licenses = ['LGPL-3.0']
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: thai_romanize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vee Satayamas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-04-25 00:00:00.000000000 Z
11
+ date: 2021-04-26 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A Thai romanization function ported from PyThaiNLP
14
14
  email: