phoonnx 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. phoonnx/__init__.py +0 -0
  2. phoonnx/config.py +490 -0
  3. phoonnx/locale/ca/phonetic_spellings.txt +2 -0
  4. phoonnx/locale/en/phonetic_spellings.txt +1 -0
  5. phoonnx/locale/gl/phonetic_spellings.txt +2 -0
  6. phoonnx/locale/pt/phonetic_spellings.txt +2 -0
  7. phoonnx/phoneme_ids.py +453 -0
  8. phoonnx/phonemizers/__init__.py +45 -0
  9. phoonnx/phonemizers/ar.py +42 -0
  10. phoonnx/phonemizers/base.py +216 -0
  11. phoonnx/phonemizers/en.py +250 -0
  12. phoonnx/phonemizers/fa.py +46 -0
  13. phoonnx/phonemizers/gl.py +142 -0
  14. phoonnx/phonemizers/he.py +67 -0
  15. phoonnx/phonemizers/ja.py +119 -0
  16. phoonnx/phonemizers/ko.py +97 -0
  17. phoonnx/phonemizers/mul.py +606 -0
  18. phoonnx/phonemizers/vi.py +44 -0
  19. phoonnx/phonemizers/zh.py +308 -0
  20. phoonnx/thirdparty/__init__.py +0 -0
  21. phoonnx/thirdparty/arpa2ipa.py +249 -0
  22. phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
  23. phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
  24. phoonnx/thirdparty/hangul2ipa.py +783 -0
  25. phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
  26. phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
  27. phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
  28. phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
  29. phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
  30. phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
  31. phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
  32. phoonnx/thirdparty/ko_tables/yale.csv +22 -0
  33. phoonnx/thirdparty/kog2p/__init__.py +385 -0
  34. phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
  35. phoonnx/thirdparty/mantoq/__init__.py +67 -0
  36. phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
  37. phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
  38. phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
  39. phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
  40. phoonnx/thirdparty/mantoq/num2words.py +37 -0
  41. phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
  42. phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
  43. phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
  44. phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
  45. phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
  46. phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
  47. phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
  48. phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
  49. phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
  50. phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
  51. phoonnx/thirdparty/tashkeel/LICENSE +22 -0
  52. phoonnx/thirdparty/tashkeel/SOURCE +1 -0
  53. phoonnx/thirdparty/tashkeel/__init__.py +212 -0
  54. phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
  55. phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
  56. phoonnx/thirdparty/tashkeel/model.onnx +0 -0
  57. phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
  58. phoonnx/thirdparty/zh_num.py +238 -0
  59. phoonnx/util.py +705 -0
  60. phoonnx/version.py +6 -0
  61. phoonnx/voice.py +521 -0
  62. phoonnx-0.0.0.dist-info/METADATA +255 -0
  63. phoonnx-0.0.0.dist-info/RECORD +86 -0
  64. phoonnx-0.0.0.dist-info/WHEEL +5 -0
  65. phoonnx-0.0.0.dist-info/top_level.txt +2 -0
  66. phoonnx_train/__main__.py +151 -0
  67. phoonnx_train/export_onnx.py +109 -0
  68. phoonnx_train/norm_audio/__init__.py +92 -0
  69. phoonnx_train/norm_audio/trim.py +54 -0
  70. phoonnx_train/norm_audio/vad.py +54 -0
  71. phoonnx_train/preprocess.py +420 -0
  72. phoonnx_train/vits/__init__.py +0 -0
  73. phoonnx_train/vits/attentions.py +427 -0
  74. phoonnx_train/vits/commons.py +147 -0
  75. phoonnx_train/vits/config.py +330 -0
  76. phoonnx_train/vits/dataset.py +214 -0
  77. phoonnx_train/vits/lightning.py +352 -0
  78. phoonnx_train/vits/losses.py +58 -0
  79. phoonnx_train/vits/mel_processing.py +139 -0
  80. phoonnx_train/vits/models.py +732 -0
  81. phoonnx_train/vits/modules.py +527 -0
  82. phoonnx_train/vits/monotonic_align/__init__.py +20 -0
  83. phoonnx_train/vits/monotonic_align/setup.py +13 -0
  84. phoonnx_train/vits/transforms.py +212 -0
  85. phoonnx_train/vits/utils.py +16 -0
  86. phoonnx_train/vits/wavfile.py +860 -0
@@ -0,0 +1,22 @@
1
+ C,CSymbol,V,VSymbol
2
+ ㅂ,p,ㅏ,ä
3
+ ㄷ,t,ㅔ,e
4
+ ㅌ,tʰ,ㅐ,ɛ
5
+ ㅈ,tɕ,ㅣ,i
6
+ ㅉ,tɕ*,ㅗ,o
7
+ ㅊ,tɕʰ,ㅚ,wɛ
8
+ ㄱ,k,ㅜ,u
9
+ ㅎ,h,ㅓ,ʌ̹
10
+ ㄲ,k*,ㅡ,ɯ
11
+ ㅋ,kʰ,ㅢ,ɰi
12
+ ㄹ,l,ㅛ,jo
13
+ ㅁ,m,ㅠ,ju
14
+ ㄴ,n,ㅑ,ja
15
+ ㅇ,ŋ,ㅕ,jʌ̹
16
+ ㄸ,t*,ㅖ,je
17
+ ㅃ,p*,ㅒ,jɛ
18
+ ㅍ,pʰ,ㅘ,wa
19
+ ㅅ,s,ㅝ,wʌ̹
20
+ ㅆ,s*,ㅟ,wi
21
+ #,#,ㅙ,wɛ
22
+ $,$,ㅞ,we
@@ -0,0 +1,11 @@
1
+ _from,_to
2
+ ㄲ,ㄱ
3
+ ㅋ,ㄱ
4
+ ㅅ,ㄷ
5
+ ㅆ,ㄷ
6
+ ㅈ,ㄷ
7
+ ㅊ,ㄷ
8
+ ㅌ,ㄷ
9
+ ㅎ,ㄷ
10
+ ㅂ,ㅂ
11
+ ㅍ,ㅂ
@@ -0,0 +1,56 @@
1
+ _from,_to
2
+ ㄱㄱ,ㄱㄲ
3
+ ㄲㄱ,ㄲㄲ
4
+ ㅋㄱ,ㅋㄲ
5
+ ㄱㄷ,ㄱㄸ
6
+ ㄲㄷ,ㄲㄸ
7
+ ㅋㄷ,ㅋㄸ
8
+ ㄱㅂ,ㄱㅃ
9
+ ㄲㅂ,ㄲㅃ
10
+ ㅋㅂ,ㅋㅃ
11
+ ㄱㅅ,ㄱㅆ
12
+ ㄲㅅ,ㄲㅆ
13
+ ㅋㅅ,ㅋㅆ
14
+ ㄱㅈ,ㄱㅉ
15
+ ㄲㅈ,ㄲㅉ
16
+ ㅋㅈ,ㅋㅉ
17
+ ㄷㄱ,ㄷㄲ
18
+ ㅅㄱ,ㅅㄲ
19
+ ㅆㄱ,ㅆㄲ
20
+ ㅈㄱ,ㅈㄲ
21
+ ㅊㄱ,ㅊㄲ
22
+ ㅌㄱ,ㅌㄲ
23
+ ㄷㄷ,ㄷㄸ
24
+ ㅅㄷ,ㅅㄸ
25
+ ㅆㄷ,ㅆㄸ
26
+ ㅈㄷ,ㅈㄸ
27
+ ㅊㄷ,ㅊㄸ
28
+ ㅌㄷ,ㅌㄸ
29
+ ㄷㅂ,ㄷㅃ
30
+ ㅅㅂ,ㅅㅃ
31
+ ㅆㅂ,ㅆㅃ
32
+ ㅈㅂ,ㅈㅃ
33
+ ㅊㅂ,ㅊㅃ
34
+ ㅌㅂ,ㅌㅃ
35
+ ㄷㅅ,ㄷㅆ
36
+ ㅅㅅ,ㅅㅆ
37
+ ㅆㅅ,ㅆㅆ
38
+ ㅈㅅ,ㅈㅆ
39
+ ㅊㅅ,ㅊㅆ
40
+ ㅌㅅ,ㅌㅆ
41
+ ㄷㅈ,ㄷㅉ
42
+ ㅅㅈ,ㅅㅉ
43
+ ㅆㅈ,ㅆㅉ
44
+ ㅈㅈ,ㅈㅉ
45
+ ㅊㅈ,ㅊㅉ
46
+ ㅌㅈ,ㅌㅉ
47
+ ㅂㄱ,ㅂㄲ
48
+ ㅍㄱ,ㅍㄲ
49
+ ㅂㄷ,ㅂㄸ
50
+ ㅍㄷ,ㅍㄸ
51
+ ㅂㅂ,ㅂㅃ
52
+ ㅍㅂ,ㅍㅃ
53
+ ㅂㅅ,ㅂㅆ
54
+ ㅍㅅ,ㅍㅆ
55
+ ㅂㅈ,ㅂㅉ
56
+ ㅍㅈ,ㅍㅉ
@@ -0,0 +1,22 @@
1
+ C,CSymbol,V,VSymbol
2
+ ㅂ,p,ㅏ,a
3
+ ㄷ,t,ㅔ,ey
4
+ ㅌ,th,ㅐ,ay
5
+ ㅈ,c,ㅣ,i
6
+ ㅉ,cc,ㅗ,o
7
+ ㅊ,ch,ㅚ,oy
8
+ ㄱ,k,ㅜ,wu
9
+ ㅎ,h,ㅓ,e
10
+ ㄲ,kk,ㅡ,u
11
+ ㅋ,kh,ㅢ,uy
12
+ ㄹ,l,ㅛ,yo
13
+ ㅁ,m,ㅠ,yu
14
+ ㄴ,n,ㅑ,ya
15
+ ㅇ,ng,ㅕ,ye
16
+ ㄸ,tt,ㅖ,yey
17
+ ㅃ,pp,ㅒ,yay
18
+ ㅍ,ph,ㅘ,wa
19
+ ㅅ,s,ㅝ,we
20
+ ㅆ,ss,ㅟ,wi
21
+ #,#,ㅙ,way
22
+ $,$,ㅞ,wey
@@ -0,0 +1,385 @@
1
+ # taken from https://github.com/scarletcho/KoG2P
2
+ '''
3
+ g2p.py
4
+ ~~~~~~~~~~
5
+
6
+ This script converts Korean graphemes to romanized phones and then to pronunciation.
7
+
8
+ (1) graph2phone: convert Korean graphemes to romanized phones
9
+ (2) phone2prono: convert romanized phones to pronunciation
10
+ (3) graph2phone: convert Korean graphemes to pronunciation
11
+
12
+ Usage: $ python g2p.py '스물 여덟째 사람'
13
+ (NB. Please check 'rulebook_path' before usage.)
14
+
15
+ Yejin Cho (ycho@utexas.edu)
16
+ Jaegu Kang (jaekoo.jk@gmail.com)
17
+ Hyungwon Yang (hyung8758@gmail.com)
18
+ Yeonjung Hong (yvonne.yj.hong@gmail.com)
19
+
20
+ Created: 2016-08-11
21
+ Last updated: 2019-01-31 Yejin Cho
22
+
23
+ * Key updates made:
24
+ - Executable in both Python 2 and 3.
25
+ - G2P Performance test available ($ python g2p.py test)
26
+ - G2P verbosity control available
27
+
28
+ '''
29
+
30
+ import datetime as dt
31
+ import os.path
32
+ import re
33
+ import math
34
+ import sys
35
+ import optparse
36
+
37
+ # Option
38
+ parser = optparse.OptionParser()
39
+ parser.add_option("-v", action="store_true", dest="verbose", default="False",
40
+ help="This option prints the detail information of g2p process.")
41
+
42
+ (options, args) = parser.parse_args()
43
+ verbose = options.verbose
44
+
45
+ # Check Python version
46
+ ver_info = sys.version_info
47
+
48
+ if ver_info[0] == 2:
49
+ reload(sys)
50
+ sys.setdefaultencoding('utf-8')
51
+
52
+
53
+ def readfileUTF8(fname):
54
+ f = open(fname, 'r')
55
+ corpus = []
56
+
57
+ while True:
58
+ line = f.readline()
59
+ line = line.encode("utf-8")
60
+ line = re.sub(u'\n', u'', line)
61
+ if line != u'':
62
+ corpus.append(line)
63
+ if not line: break
64
+
65
+ f.close()
66
+ return corpus
67
+
68
+
69
+ def writefile(body, fname):
70
+ out = open(fname, 'w')
71
+ for line in body:
72
+ out.write('{}\n'.format(line))
73
+ out.close()
74
+
75
+
76
+ def readRules(pver, rule_book):
77
+ if pver == 2:
78
+ f = open(rule_book, 'r')
79
+ elif pver == 3:
80
+ f = open(rule_book, 'r', encoding="utf-8")
81
+
82
+ rule_in = []
83
+ rule_out = []
84
+
85
+ while True:
86
+ line = f.readline()
87
+ if pver == 2:
88
+ line = unicode(line.encode("utf-8"))
89
+ line = re.sub(u'\n', u'', line)
90
+ elif pver == 3:
91
+ line = re.sub('\n', '', line)
92
+
93
+ if line != u'':
94
+ if line[0] != u'#':
95
+ IOlist = line.split('\t')
96
+ rule_in.append(IOlist[0])
97
+ if IOlist[1]:
98
+ rule_out.append(IOlist[1])
99
+ else: # If output is empty (i.e. deletion rule)
100
+ rule_out.append(u'')
101
+ if not line: break
102
+ f.close()
103
+
104
+ return rule_in, rule_out
105
+
106
+
107
+ def isHangul(charint):
108
+ hangul_init = 44032
109
+ hangul_fin = 55203
110
+ return charint >= hangul_init and charint <= hangul_fin
111
+
112
+
113
+ def checkCharType(var_list):
114
+ # 1: whitespace
115
+ # 0: hangul
116
+ # -1: non-hangul
117
+ checked = []
118
+ for i in range(len(var_list)):
119
+ if var_list[i] == 32: # whitespace
120
+ checked.append(1)
121
+ elif isHangul(var_list[i]): # Hangul character
122
+ checked.append(0)
123
+ else: # Non-hangul character
124
+ checked.append(-1)
125
+ return checked
126
+
127
+
128
+ def graph2phone(graphs):
129
+ # Encode graphemes as utf8
130
+ try:
131
+ graphs = graphs.decode('utf8')
132
+ except AttributeError:
133
+ pass
134
+
135
+ integers = []
136
+ for i in range(len(graphs)):
137
+ integers.append(ord(graphs[i]))
138
+
139
+ # Romanization (according to Korean Spontaneous Speech corpus; 성인자유발화코퍼스)
140
+ phones = ''
141
+ ONS = ['k0', 'kk', 'nn', 't0', 'tt', 'rr', 'mm', 'p0', 'pp',
142
+ 's0', 'ss', 'oh', 'c0', 'cc', 'ch', 'kh', 'th', 'ph', 'h0']
143
+ NUC = ['aa', 'qq', 'ya', 'yq', 'vv', 'ee', 'yv', 'ye', 'oo', 'wa',
144
+ 'wq', 'wo', 'yo', 'uu', 'wv', 'we', 'wi', 'yu', 'xx', 'xi', 'ii']
145
+ COD = ['', 'kf', 'kk', 'ks', 'nf', 'nc', 'nh', 'tf',
146
+ 'll', 'lk', 'lm', 'lb', 'ls', 'lt', 'lp', 'lh',
147
+ 'mf', 'pf', 'ps', 's0', 'ss', 'oh', 'c0', 'ch',
148
+ 'kh', 'th', 'ph', 'h0']
149
+
150
+ # Pronunciation
151
+ idx = checkCharType(integers)
152
+ iElement = 0
153
+ while iElement < len(integers):
154
+ if idx[iElement] == 0: # not space characters
155
+ base = 44032
156
+ df = int(integers[iElement]) - base
157
+ iONS = int(math.floor(df / 588)) + 1
158
+ iNUC = int(math.floor((df % 588) / 28)) + 1
159
+ iCOD = int((df % 588) % 28) + 1
160
+
161
+ s1 = '-' + ONS[iONS - 1] # onset
162
+ s2 = NUC[iNUC - 1] # nucleus
163
+
164
+ if COD[iCOD - 1]: # coda
165
+ s3 = COD[iCOD - 1]
166
+ else:
167
+ s3 = ''
168
+ tmp = s1 + s2 + s3
169
+ phones = phones + tmp
170
+
171
+ elif idx[iElement] == 1: # space character
172
+ tmp = '#'
173
+ phones = phones + tmp
174
+
175
+ phones = re.sub('-(oh)', '-', phones)
176
+ iElement += 1
177
+ tmp = ''
178
+
179
+ # 초성 이응 삭제
180
+ phones = re.sub('^oh', '', phones)
181
+ phones = re.sub('-(oh)', '', phones)
182
+
183
+ # 받침 이응 'ng'으로 처리 (Velar nasal in coda position)
184
+ phones = re.sub('oh-', 'ng-', phones)
185
+ phones = re.sub('oh([# ]|$)', 'ng', phones)
186
+
187
+ # Remove all characters except Hangul and syllable delimiter (hyphen; '-')
188
+ phones = re.sub('(\W+)\-', '\\1', phones)
189
+ phones = re.sub('\W+$', '', phones)
190
+ phones = re.sub('^\-', '', phones)
191
+ return phones
192
+
193
+
194
+ def phone2prono(phones, rule_in, rule_out):
195
+ # Apply g2p rules
196
+ for pattern, replacement in zip(rule_in, rule_out):
197
+ # print pattern
198
+ phones = re.sub(pattern, replacement, phones)
199
+ prono = phones
200
+ return prono
201
+
202
+
203
+ def addPhoneBoundary(phones):
204
+ # Add a comma (,) after every second alphabets to mark phone boundaries
205
+ ipos = 0
206
+ newphones = ''
207
+ while ipos + 2 <= len(phones):
208
+ if phones[ipos] == u'-':
209
+ newphones = newphones + phones[ipos]
210
+ ipos += 1
211
+ elif phones[ipos] == u' ':
212
+ ipos += 1
213
+ elif phones[ipos] == u'#':
214
+ newphones = newphones + phones[ipos]
215
+ ipos += 1
216
+
217
+ newphones = newphones + phones[ipos] + phones[ipos + 1] + u','
218
+ ipos += 2
219
+
220
+ return newphones
221
+
222
+
223
+ def addSpace(phones):
224
+ ipos = 0
225
+ newphones = ''
226
+ while ipos < len(phones):
227
+ if ipos == 0:
228
+ newphones = newphones + phones[ipos] + phones[ipos + 1]
229
+ else:
230
+ newphones = newphones + ' ' + phones[ipos] + phones[ipos + 1]
231
+ ipos += 2
232
+
233
+ return newphones
234
+
235
+
236
+ def graph2prono(graphs, rule_in, rule_out):
237
+ romanized = graph2phone(graphs)
238
+ romanized_bd = addPhoneBoundary(romanized)
239
+ prono = phone2prono(romanized_bd, rule_in, rule_out)
240
+
241
+ prono = re.sub(u',', u' ', prono)
242
+ prono = re.sub(u' $', u'', prono)
243
+ prono = re.sub(u'#', u'-', prono)
244
+ prono = re.sub(u'-+', u'-', prono)
245
+
246
+ prono_prev = prono
247
+ identical = False
248
+ loop_cnt = 1
249
+
250
+ if verbose == True:
251
+ print('=> Romanized: ' + romanized)
252
+ print('=> Romanized with boundaries: ' + romanized_bd)
253
+ print('=> Initial output: ' + prono)
254
+
255
+ while not identical:
256
+ prono_new = phone2prono(re.sub(u' ', u',', prono_prev + u','), rule_in, rule_out)
257
+ prono_new = re.sub(u',', u' ', prono_new)
258
+ prono_new = re.sub(u' $', u'', prono_new)
259
+
260
+ if re.sub(u'-', u'', prono_prev) == re.sub(u'-', u'', prono_new):
261
+ identical = True
262
+ prono_new = re.sub(u'-', u'', prono_new)
263
+ if verbose == True:
264
+ print('\n=> Exhaustive rule application completed!')
265
+ print('=> Total loop count: ' + str(loop_cnt))
266
+ print('=> Output: ' + prono_new)
267
+ else:
268
+ if verbose == True:
269
+ print('\n=> Rule applied for more than once')
270
+ print('cmp1: ' + re.sub(u'-', u'', prono_prev))
271
+ print('cmp2: ' + re.sub(u'-', u'', prono_new))
272
+ loop_cnt += 1
273
+ prono_prev = prono_new
274
+
275
+ return prono_new
276
+
277
+
278
+ def testG2P(rulebook, testset):
279
+ [testin, testout] = readRules(ver_info[0], testset)
280
+ cnt = 0
281
+ body = []
282
+ for idx in range(0, len(testin)):
283
+ print('Test item #: ' + str(idx + 1) + '/' + str(len(testin)))
284
+ item_in = testin[idx]
285
+ item_out = testout[idx]
286
+ ans = graph2phone(item_out)
287
+ ans = re.sub(u'-', u'', ans)
288
+ ans = addSpace(ans)
289
+
290
+ [rule_in, rule_out] = readRules(ver_info[0], rulebook)
291
+ pred = graph2prono(item_in, rule_in, rule_out)
292
+
293
+ if pred != ans:
294
+ print('G2P ERROR: [result] ' + pred + '\t\t\t[ans] ' + item_in + ' [' + item_out + '] ' + ans)
295
+ cnt += 1
296
+ else:
297
+ body.append('[result] ' + pred + '\t\t\t[ans] ' + item_in + ' [' + item_out + '] ' + ans)
298
+
299
+ print('Total error item #: ' + str(cnt))
300
+ writefile(body, 'good.txt')
301
+
302
+
303
+ kog2p_to_hangul = {
304
+ # Onset consonants
305
+ 'p0': 'ㅂ',
306
+ 'ph': 'ㅍ',
307
+ 'pp': 'ㅃ',
308
+ 't0': 'ㄷ',
309
+ 'th': 'ㅌ',
310
+ 'tt': 'ㄸ',
311
+ 'k0': 'ㄱ',
312
+ 'kh': 'ㅋ',
313
+ 'kk': 'ㄲ',
314
+ 's0': 'ㅅ',
315
+ 'ss': 'ㅆ',
316
+ 'h0': 'ㅎ',
317
+ 'c0': 'ㅈ',
318
+ 'ch': 'ㅊ',
319
+ 'cc': 'ㅉ',
320
+ 'mm': 'ㅁ',
321
+ 'nn': 'ㄴ',
322
+ 'rr': 'ㄹ',
323
+
324
+ # Coda consonants
325
+ 'pf': 'ㅂ',
326
+ 'tf': 'ㄷ',
327
+ 'kf': 'ㄱ',
328
+ 'mf': 'ㅁ',
329
+ 'nf': 'ㄴ',
330
+ 'ng': 'ㅇ',
331
+ 'll': 'ㄹ',
332
+ 'ks': 'ㄱㅅ',
333
+ 'nc': 'ㄴㅈ',
334
+ 'nh': 'ㄴㅎ',
335
+ 'lk': 'ㄹㄱ',
336
+ 'lm': 'ㄹㅁ',
337
+ 'lb': 'ㄹㅂ',
338
+ 'ls': 'ㄹㅅ',
339
+ 'lt': 'ㄹㅌ',
340
+ 'lp': 'ㄹㅍ',
341
+ 'lh': 'ㄹㅎ',
342
+ 'ps': 'ㅂㅅ',
343
+
344
+ # Monophthongs (vowels)
345
+ 'ii': 'ㅣ',
346
+ 'ee': 'ㅔ',
347
+ 'qq': 'ㅐ',
348
+ 'aa': 'ㅏ',
349
+ 'xx': 'ㅡ',
350
+ 'vv': 'ㅓ',
351
+ 'uu': 'ㅜ',
352
+ 'oo': 'ㅗ',
353
+
354
+ # Diphthongs (vowels)
355
+ 'ye': 'ㅖ',
356
+ 'yq': 'ㅒ',
357
+ 'ya': 'ㅑ',
358
+ 'yv': 'ㅕ',
359
+ 'yu': 'ㅠ',
360
+ 'yo': 'ㅛ',
361
+ 'wi': 'ㅟ',
362
+ 'wo': 'ㅚ',
363
+ 'wq': 'ㅙ',
364
+ 'we': 'ㅞ',
365
+ 'wa': 'ㅘ',
366
+ 'wv': 'ㅝ',
367
+ 'xi': 'ㅢ',
368
+ }
369
+
370
+
371
+ def runKoG2P(graph, rulebook=None):
372
+ if not rulebook:
373
+ rulebook = f"{os.path.dirname(__file__)}/rulebook.txt"
374
+ [rule_in, rule_out] = readRules(ver_info[0], rulebook)
375
+ words = graph.split()
376
+ phonemized = []
377
+ for w in words:
378
+ phones = graph2prono(w, rule_in, rule_out).split()
379
+ hangul = "".join([kog2p_to_hangul[p] for p in phones])
380
+ phonemized.append(hangul)
381
+ return " ".join(phonemized)
382
+
383
+
384
+
385
+