thai_romanize 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4119c9e70af6d5ef11b5288918e847d5e86bb2c6fb3cc3b2f029bada57a07f5c
4
- data.tar.gz: 30675cec29f5e8e47d7273cc21821ee8eb8bbaf29b86e2ff0898ceffdc78e864
3
+ metadata.gz: 184a6f358fbcbf8cc9fe0d67a9ad311d5004daa34a0444ed256a22423c297b74
4
+ data.tar.gz: c1793fa4fbe038d43298bba837508060044d53a017af002d89cbed41e05ad3d0
5
5
  SHA512:
6
- metadata.gz: 72b06ca00efb7b0a52d482b77bab6637b53f67524cb266435237e7f6d0517d2cf2a68bb409a3e7a015deeae0bacdad3f3d0bad6970ee0a43cb1a0b7069365865
7
- data.tar.gz: beccb03e2e83b737d7c59e0a574b45bbfe85be7525ef461e96519233565d6f2b47123b4abcd0df0a912d27b607370e471190bf5ef7abbe1bcb32f73e094ba447
6
+ metadata.gz: f7de5f696e935eb2a855e586c3b406ef31a820f0bce4295f5a6d02a53b0b1edc54caecfa04fcceb238c6826f24a40f24db0047edaae023be4a39498b7c0497de
7
+ data.tar.gz: a5fc9d897a724778b4e916bb8a50755a9d4b4baa5327c1ca4c89369db078dfa72e8f4ceceec926521b4af3b4dd461973b7b7415f37e00eeb4b3506f55652febc
data/lib/thai_romanize.rb CHANGED
@@ -122,55 +122,30 @@ $ฤ,\\1ri"""
122
122
  VOWELS.each { word.gsub!(_1, _2) }
123
123
  return word
124
124
  end
125
-
125
+
126
126
  def self.replace_consonants(word, consonants)
127
127
  return word unless consonants
128
128
  return word.gsub(consonants[0], CONSONANTS[consonants[0]][0]) if consonants.length == 1
129
- i = 0
130
- len_cons = consonants.length
131
- while i < len_cons
132
- if i == 0
133
- if consonants[0] == "ห"
134
- word.gsub!(consonants[0], "")
135
- consonants.delete_at(0)
136
- len_cons -= 1
137
- else
138
- word.gsub!(consonants[0], CONSONANTS[consonants[0]][0])
139
- i += 1
140
- end
141
- elsif consonants[i] == "ร" and i == word.length and word[i - 1] == ""
142
- word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
143
- elsif consonants[i] == "ร" and i < word.length
144
- if i + 1 == word.length and word[i] == "ร"
145
- word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
146
- elsif word[i] == "ร" and i + 1 < word.length
147
- if word[i + 1] == "ร"
148
- word = word.chars
149
- word.delete_at(i + 1)
150
- if i + 2 == len_cons
151
- word[i] = "an"
152
- else
153
- word[i] = "a"
154
- end
155
- word = word.join("")
156
- i += 1
157
- elsif word[i] == "ร"
158
- word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
159
- i += 1
160
- else
161
- word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
162
- i += 1
163
- end
164
- end
165
- elsif word[i] == "ร"
166
- word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
167
- i += 1
129
+ consonants.reduce({rom: "", th: word}) do |w, consonant|
130
+ non_thai = w[:th].match(/^[^ก-์]+/)
131
+ if non_thai
132
+ w[:rom] += non_thai.to_s
133
+ w[:th] = w[:th][non_thai.to_s.length..-1]
134
+ end
135
+ if w[:skip]
136
+ {rom: w[:rom], th: w[:th]}
137
+ elsif w[:rom] == "" and w[:th] == "ห"
138
+ {rom: "", th: w[:th][1..-1]}
139
+ elsif w[:rom] == ""
140
+ {rom: CONSONANTS[consonant][0], th: w[:th][consonant.length..-1]}
141
+ elsif consonant == "ร" and w[:th] == "รร"
142
+ {rom: w[:rom] + "an", th: w[:th][2..-1], skip: true}
143
+ elsif consonant == "ร" and w[:th][0..1] == "รร"
144
+ {rom: w[:rom] + "a", th: w[:th][2..-1], skip: true}
168
145
  else
169
- word.gsub!(consonants[i], CONSONANTS[consonants[i]][1])
170
- i += 1
146
+ {rom: w[:rom] + CONSONANTS[consonant][1], th: w[:th][consonant.length..-1]}
171
147
  end
172
- end
173
- return word
148
+ end[:rom]
174
149
  end
175
150
 
176
151
  def self.romanize_word(word)
@@ -187,8 +162,8 @@ $ฤ,\\1ri"""
187
162
 
188
163
  WORDCUT = WordcutA::Wordcut.new(WordcutA::DEFAULT_THAI_DICT_PATH)
189
164
 
190
- def self.romanize(text)
191
- WORDCUT.into_strings(text).map { romanize_word _1 }.join(" ")
165
+ def self.romanize(text, delim = "")
166
+ WORDCUT.into_strings(text).map { romanize_word _1 }.join(delim)
192
167
  end
193
168
  end
194
169
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'thai_romanize'
3
- s.version = '0.0.1'
3
+ s.version = '0.0.3'
4
4
  s.authors = ['Vee Satayamas']
5
5
  s.email = ['5ssgdxltv@relay.firefox.com']
6
6
  s.licenses = ['LGPL-3.0']
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: thai_romanize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vee Satayamas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-04-25 00:00:00.000000000 Z
11
+ date: 2021-04-26 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A Thai romanization function ported from PyThaiNLP
14
14
  email: