interscript 0.1.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +76 -128
  21. data/lib/interscript/command.rb +6 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -223
  63. data/README.adoc +0 -297
  64. data/bin/rspec +0 -29
  65. data/lib/g2pwrapper.py +0 -34
  66. data/lib/interscript/mapping.rb +0 -125
  67. data/lib/model-7 +0 -0
  68. data/lib/tha-pt-b-7 +0 -0
  69. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  70. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  71. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  72. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  73. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  74. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  75. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  76. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  77. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  78. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  79. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  80. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  81. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  82. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  83. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  84. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  85. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  86. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  87. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  88. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  89. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  90. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  91. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  92. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  93. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  94. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  95. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  96. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  97. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  98. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  99. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  100. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  101. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +0 -7456
  102. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  103. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  104. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  105. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  106. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  107. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  108. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  109. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  110. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  111. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  112. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  113. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  114. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  115. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  116. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  117. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  118. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  119. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  120. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  121. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  122. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  123. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  124. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  125. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  126. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  127. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  128. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  129. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  130. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  131. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  132. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  133. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  134. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  135. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  136. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  137. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  138. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  139. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  140. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  141. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  142. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  143. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  144. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  145. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  146. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  147. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  148. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  149. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  150. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  151. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  152. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  153. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  154. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  155. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  156. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  157. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  158. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  159. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  160. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  161. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  162. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  163. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  164. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  165. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  166. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  167. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  168. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  169. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  170. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  171. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  172. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  173. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  174. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  175. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  176. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  177. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  178. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  179. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  180. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  181. data/spec/interscript/mapping_spec.rb +0 -42
  182. data/spec/interscript_spec.rb +0 -26
  183. data/spec/spec_helper.rb +0 -3
@@ -1,901 +0,0 @@
1
- ---
2
- authority_id: kp
3
- id: 2002
4
- language: kor
5
- source_script: Hang
6
- destination_script: Latn
7
- name: Korean Democratic People's Republic of Korea Korean System (2002)
8
- url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/8th-uncsgn-docs/inf/8th_UNCSGN_econf.94_INF.72.pdf
9
- creation_date:
10
- adoption_date:
11
- description:
12
-
13
- notes:
14
-
15
- - Here is a list of features that are listed in the guideline but
16
- not unimplemented in this map.
17
-
18
- - Note 3.2
19
- The combination n+r is romanized as -ll- only when it is "considered
20
- to be longstanding". In this implementation, all n+r will be romanized as
21
- -ll- for the sake of simplicity.
22
-
23
- - Note 3.3
24
- Sai-siot (Connective ㅅ) is not written out in DPRK Korean, but it is
25
- supposed to be romanized. Sai-siot is not predictable.
26
- This has not been implemented.
27
-
28
- - Note 4.1
29
- Hyphen "may be inserted in case of a possible confusion in pronunciation".
30
- Except for the n-g combination, this has not been implemented.
31
-
32
- - Note 4.4
33
- Geographical names "may be transliterated or translated". In this map,
34
- all names will be transliterated, not translated. Numerals will not be
35
- transliterated.
36
-
37
- - Note 4.5
38
- Spacing rule for personal names has not been implemented.
39
-
40
- - Note 4.7
41
- Optional omission of diacritics and optional simplification of
42
- KK, TT, PP, SS, JJ to single letter have not been implemented.
43
-
44
- tests:
45
- # Note1.5
46
- - source: "우리산"
47
- expected: "Urisan"
48
-
49
- # Note2.1
50
- - source: "교구동"
51
- expected: "Kyogu-dong"
52
- - source: "초도"
53
- expected: "Chodo"
54
- - source: "고비리"
55
- expected: "Kobi-ri"
56
- - source: "강동"
57
- expected: "Kangdong"
58
- - source: "금교"
59
- expected: "Kümgyo"
60
- - source: "칠보산"
61
- expected: "Chilbosan"
62
-
63
- # Note2.2
64
- - source: "곡산"
65
- expected: "Koksan"
66
- - source: "갑산"
67
- expected: "Kapsan"
68
- - source: "앞산"
69
- expected: "Apsan"
70
- - source: "삿갓봉"
71
- expected: "Satkatbong"
72
-
73
- # Note2.3
74
- - source: "울산"
75
- expected: "Ulsan"
76
- # - source: "은률"
77
- # expected: "Ünryul" # This is an exceptino to note 3.1
78
-
79
- # Note2.4
80
- - source: "닭섬"
81
- expected: "Taksŏm"
82
- - source: "물곬"
83
- expected: "Mulkol"
84
- - source: "붉은바위"
85
- expected: "Pulgünbawi"
86
- - source: "앉은바위"
87
- expected: "Anjünbawi"
88
-
89
- # Note3.1
90
- - source: "백마산"
91
- expected: "Paengmasan"
92
- - source: "꽃마을"
93
- expected: "Kkonmaül"
94
- - source: "압록강"
95
- expected: "Amrokgang"
96
-
97
- # Note3.2
98
- - source: "천리마"
99
- expected: "Chŏllima"
100
- # - source: "한나산" # Typo in the original document
101
- - source: "한라산"
102
- expected: "Hallasan"
103
- - source: "전라도"
104
- expected: "Jŏlla-do"
105
-
106
- # Note3.3
107
-
108
- # - source: "기대산" # ROK: 깃대산
109
- # expected: "Kittaesan"
110
- # - source: "새별읍" # ROK: 샛별
111
- # expected: "Saeppyŏl-üp" # hyphen
112
- # - source: "뒤문" # ROK: 뒷문
113
- # expected: "Twinmun"
114
-
115
- # Note4.1 - Separator (OPTIONAL)
116
-
117
- - source: "앞-언덕"
118
- expected: "Ap-ŏndŏk"
119
- - source: "부억-안골"
120
- expected: "Puŏk-angol"
121
- - source: "판교"
122
- expected: "Phan-gyo"
123
- # - source: "방어동"
124
- # expected: "Pang-ŏ-dong"
125
-
126
- # Note4.2
127
- - source: "평안남도 평성시"
128
- expected: "Phyŏngannam-do Phyŏngsŏng-si"
129
-
130
- # Note4.3
131
- - source: "3.1동"
132
- expected: "3.1-dong"
133
-
134
- # Note4.6
135
- - source: "평양"
136
- expected: "Pyongyang"
137
-
138
- map:
139
- character_separator: ""
140
- word_separator: " "
141
- title_case: True
142
- inherit: "nil-kor-Hang-Hang-jamo"
143
-
144
- rules:
145
-
146
- # This system does not require transliteration of numerals
147
- # convert numbers to space + Hangul
148
- # - pattern: "([^0-9 ])(?=[0-9])"
149
- # result: "\\1 "
150
- # - pattern: "1"
151
- # result: "일"
152
- # - pattern: "2"
153
- # result: "이"
154
- # - pattern: "3"
155
- # result: "삼"
156
- # - pattern: "4"
157
- # result: "사"
158
- # - pattern: "5"
159
- # result: "오"
160
- # - pattern: "6"
161
- # result: "육"
162
- # - pattern: "7"
163
- # result: "칠"
164
- # - pattern: "8"
165
- # result: "팔"
166
- # - pattern: "9"
167
- # result: "구"
168
-
169
- # Use voiced onset for geographical features
170
- # Note 4.3.1
171
- - pattern: "(?<=..)산( |$)"
172
- result: "san\\1"
173
- - pattern: "(?<=..)거리( |$)"
174
- result: "gŏri\\1"
175
- - pattern: "(?<=..)고개( |$)"
176
- result: "gogae\\1"
177
- - pattern: "(?<=..)대( |$)"
178
- result: "dae\\1"
179
- - pattern: "(?<=..)봉( |$)"
180
- result: "bong\\1"
181
- - pattern: "(?<=..)교( |$)"
182
- result: "gyo\\1"
183
- - pattern: "(?<=..)골( |$)"
184
- result: "gol\\1"
185
- - pattern: "(?<=..)각( |$)"
186
- result: "gak\\1"
187
- - pattern: "(?<=..)벌( |$)"
188
- result: "bŏl\\1"
189
- - pattern: "(?<=..)관( |$)"
190
- result: "gwan\\1"
191
- - pattern: "(?<=..)곶( |$)"
192
- result: "got\\1"
193
- - pattern: "(?<=..)강( |$)"
194
- result: "gang\\1"
195
-
196
- # add hyphen in front of generics
197
- # Only add hyphen if the name is three syllables or longer
198
- - pattern: "(?<=..)도( |$)"
199
- result: "-do\\1"
200
- - pattern: "(?<=..)시( |$)"
201
- result: "-si\\1"
202
- - pattern: "(?<=..)군( |$)"
203
- result: "-gun\\1"
204
- - pattern: "(?<=..)면( |$)"
205
- result: "-myŏn\\1"
206
- - pattern: "(?<=..)리( |$)"
207
- result: "-ri\\1"
208
- - pattern: "(?<=..)동( |$)"
209
- result: "-dong\\1"
210
- - pattern: "(?<=..)구( |$)"
211
- result: "-gu\\1"
212
- - pattern: "(?<=..)구역( |$)"
213
- result: "-guyŏk\\1"
214
-
215
- # The name Pyongyang will be an exception
216
- # Not Phyŏngyang
217
-
218
- - pattern: "평양"
219
- result: "Pyongyang"
220
-
221
- postrules:
222
-
223
- # Add space to the two ends of the string for easier word boundary handling
224
- - pattern: "^"
225
- result: " "
226
- - pattern: "$"
227
- result: " "
228
-
229
- # HANGUL JONGSEONG SSANGKIYEOK
230
- - pattern: "ᆩᄋ"
231
- result: "ᆨᄁ"
232
- - pattern: "ᆩ"
233
- result: "ᆨ"
234
-
235
- # HANGUL JONGSEONG SSANGKIYEOK
236
- - pattern: "ᆪᄋ"
237
- result: "ᆨᄉ"
238
- - pattern: "ᆪ"
239
- result: "ᆨ"
240
-
241
- # HANGUL JONGSEONG NIEUN-CIEUC
242
- - pattern: "ᆬᄋ"
243
- result: "ᆫᄌ"
244
- - pattern: "ᆬ"
245
- result: "ᆫ"
246
-
247
- # HANGUL JONGSEONG NIEUN-CIEUC
248
- - pattern: "ᆭᄀ"
249
- result: "ᆫᄏ"
250
- - pattern: "ᆭᄃ"
251
- result: "ᆫᄐ"
252
- - pattern: "ᆭᄇ"
253
- result: "ᆫᄑ"
254
- - pattern: "ᆭᄌ"
255
- result: "ᆫᄎ"
256
- - pattern: "ᆭ"
257
- result: "ᆫ"
258
-
259
- # HANGUL JONGSEONG TIEUT
260
- - pattern: "ᆮ(?=[ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄌᄍᄎᄏᄐᄑᄒ])"
261
- result: "ᆺ"
262
-
263
- # HANGUL JONGSEONG RIEUL-SIOS
264
- - pattern: "ᆳᄋ"
265
- result: "ᆯᄉ"
266
- - pattern: "ᆳ"
267
- result: "ᆯ"
268
-
269
- # HANGUL JONGSEONG RIEUL-THIEUTH
270
- - pattern: "ᆴᄋ"
271
- result: "ᆯᄐ"
272
- - pattern: "ᆴ"
273
- result: "ᆯ"
274
-
275
- # HANGUL JONGSEONG RIEUL-PHIEUPH
276
- - pattern: "ᆵᄋ"
277
- result: "ᆯᄑ"
278
- - pattern: "ᆵ(?=[ᄃᄄᄐ])"
279
- result: "ᆯ"
280
- - pattern: "ᆵ"
281
- result: "ᄇ"
282
-
283
- # HANGUL JONGSEONG RIEUL-HIEUH
284
- - pattern: "ᆶᄀ"
285
- result: "ᆯᄏ"
286
- - pattern: "ᆶᄃ"
287
- result: "ᆯᄐ"
288
- - pattern: "ᆶᄇ"
289
- result: "ᆯᄑ"
290
- - pattern: "ᆶᄌ"
291
- result: "ᆯᄎ"
292
- - pattern: "ᆶ"
293
- result: "ᆯ"
294
-
295
- # HANGUL JONGSEONG PIEUP-SIOS
296
- - pattern: "ᆹᄋ"
297
- result: "ᄇᄉ"
298
- - pattern: "ᆹ"
299
- result: "ᄇ"
300
-
301
- # HANGUL JONGSEONG SSANG-SIOS
302
- - pattern: "ᆻᄋ"
303
- result: "ᆺᄊ"
304
- - pattern: "ᆻ"
305
- result: "ᆺ"
306
-
307
- # HANGUL JONGSEONG CIEUC
308
- - pattern: "ᆽᄋ"
309
- result: "ᆺᄌ"
310
- - pattern: "ᆽ"
311
- result: "ᆺ"
312
-
313
- # HANGUL JONGSEONG CHIEUCH
314
- - pattern: "ᆾᄋ"
315
- result: "ᆺᄎ"
316
- - pattern: "ᆾ"
317
- result: "ᆺ"
318
-
319
- # HANGUL JONGSEONG KHIEUKH
320
- - pattern: "ᆿᄋ"
321
- result: "ᆨᄏ"
322
- - pattern: "ᆿ"
323
- result: "ᆨ"
324
-
325
- # HANGUL JONGSEONG THIEUTH
326
- - pattern: "ᇀᄋ"
327
- result: "ᆺᄐ"
328
- - pattern: "ᇀ"
329
- result: "ᆺ"
330
-
331
- # HANGUL JONGSEONG PHIEUPH
332
- - pattern: "ᇁᄋ"
333
- result: "ᆸᄑ"
334
- - pattern: "ᇁ"
335
- result: "ᆸ"
336
-
337
- # HANGUL JONGSEONG HIEUH
338
- - pattern: "ᇂᄀ"
339
- result: "ᄏ"
340
- - pattern: "ᇂᄃ"
341
- result: "ᄐ"
342
- - pattern: "ᇂᄇ"
343
- result: "ᄑ"
344
- - pattern: "ᇂᄌ"
345
- result: "ᄎ"
346
- - pattern: "ᇂ"
347
- result: ""
348
-
349
- # From Unicode Chart
350
- # https://github.com/unicode-org/cldr/blob/master/common/transforms/Korean-Latin-BGN.xml
351
- - pattern: "ᆨᄀ"
352
- result: "kk" # HANGUL JONGSEONG KIYEOK + CHOSEONG KIYEOK
353
- - pattern: "ᆨᄂ"
354
- result: "ngn" # HANGUL JONGSEONG KIYEOK + CHOSEONG NIEUN
355
- - pattern: "ᆨᄃ"
356
- result: "kt" # HANGUL JONGSEONG KIYEOK + CHOSEONG TIEUT
357
- - pattern: "ᆨᄅ"
358
- result: "ngn" # HANGUL JONGSEONG KIYEOK + CHOSEONG RIEUL
359
- - pattern: "ᆨᄆ"
360
- result: "ngm" # HANGUL JONGSEONG KIYEOK + CHOSEONG MIEUM
361
- - pattern: "ᆨᄇ"
362
- result: "kp" # HANGUL JONGSEONG KIYEOK + CHOSEONG PIEUP
363
- - pattern: "ᆨᄉ"
364
- result: "ks" # HANGUL JONGSEONG KIYEOK + CHOSEONG SIOS
365
- - pattern: "ᆨᄋ"
366
- result: "g" # HANGUL JONGSEONG KIYEOK + CHOSEONG IEUNG
367
- - pattern: "ᆨᄌ"
368
- result: "kj" # HANGUL JONGSEONG KIYEOK + CHOSEONG CIEUC
369
- - pattern: "ᆨᄎ"
370
- result: "kch" # HANGUL JONGSEONG KIYEOK + CHOSEONG CHIEUCH
371
- - pattern: "ᆨᄏ"
372
- result: "kkh" # HANGUL JONGSEONG KIYEOK + CHOSEONG KHIEUKH # NOTE: the dash is always skipped
373
- - pattern: "ᆨᄐ"
374
- result: "kth" # HANGUL JONGSEONG KIYEOK + CHOSEONG THIEUTH
375
- - pattern: "ᆨᄑ"
376
- result: "kp" # HANGUL JONGSEONG KIYEOK + CHOSEONG PHIEUPH
377
- - pattern: "ᆨᄒ"
378
- result: "kh" # HANGUL JONGSEONG KIYEOK + CHOSEONG HIEUH
379
- - pattern: "ᆨᄁ"
380
- result: "kkk" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGKIYEOK
381
- - pattern: "ᆨᄄ"
382
- result: "ktt" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGTIEUT
383
- - pattern: "ᆨᄈ"
384
- result: "kpp" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGPIEUP
385
- - pattern: "ᆨᄊ"
386
- result: "kss" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGSIOS
387
- - pattern: "ᆨᄍ"
388
- result: "kjj" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGCIEUC
389
- - pattern: "ᆫᄀ"
390
- result: "n-g" # HANGUL JONGSEONG NIEUN + CHOSEONG KIEUK
391
- - pattern: "ᆫᄂ"
392
- result: "nn" # HANGUL JONGSEONG NIEUN + CHOSEONG NIEUN
393
- - pattern: "ᆫᄃ"
394
- result: "nd" # HANGUL JONGSEONG NIEUN + CHOSEONG TIEUT
395
- - pattern: "ᆫᄅ"
396
- result: "ll" # HANGUL JONGSEONG NIEUN + CHOSEONG RIEUL
397
- - pattern: "ᆫᄆ"
398
- result: "nm" # HANGUL JONGSEONG NIEUN + CHOSEONG MIEUM
399
- - pattern: "ᆫᄇ"
400
- result: "nb" # HANGUL JONGSEONG NIEUN + CHOSEONG PIEUP
401
- - pattern: "ᆫᄉ"
402
- result: "ns" # HANGUL JONGSEONG NIEUN + CHOSEONG SIOS
403
- - pattern: "ᆫᄋ"
404
- result: "n" # HANGUL JONGSEONG NIEUN + CHOSEONG IEUNG
405
- - pattern: "ᆫᄌ"
406
- result: "nj" # HANGUL JONGSEONG NIEUN + CHOSEONG CIEUC
407
- - pattern: "ᆫᄎ"
408
- result: "nch" # HANGUL JONGSEONG NIEUN + CHOSEONG CHIEUCH
409
- - pattern: "ᆫᄏ"
410
- result: "nkh" # HANGUL JONGSEONG NIEUN + CHOSEONG KHIEUKH
411
- - pattern: "ᆫᄐ"
412
- result: "nth" # HANGUL JONGSEONG NIEUN + CHOSEONG THIEUTH
413
- - pattern: "ᆫᄑ"
414
- result: "nph" # HANGUL JONGSEONG NIEUN + CHOSEONG PHIEUPH
415
- - pattern: "ᆫᄒ"
416
- result: "nh" # HANGUL JONGSEONG NIEUN + CHOSEONG HIEUH
417
- - pattern: "ᆫᄁ"
418
- result: "nkk" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGKIYEOK
419
- - pattern: "ᆫᄄ"
420
- result: "ntt" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGTIEUT
421
- - pattern: "ᆫᄈ"
422
- result: "npp" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGPIEUP
423
- - pattern: "ᆫᄊ"
424
- result: "nss" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGSIOS
425
- - pattern: "ᆫᄍ"
426
- result: "njj" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGCIEUC
427
- - pattern: "ᆯᄀ"
428
- result: "lk" # HANGUL JONGSEONG RIEUL + CHOSEONG KIYEOK
429
- - pattern: "ᆯᄂ"
430
- result: "ll" # HANGUL JONGSEONG RIEUL + CHOSEONG NIEUN
431
- - pattern: "ᆯᄃ"
432
- result: "lt" # HANGUL JONGSEONG RIEUL + CHOSEONG TIEUT
433
- - pattern: "ᆯᄅ"
434
- result: "ll" # HANGUL JONGSEONG RIEUL + CHOSEONG RIEUL
435
- - pattern: "ᆯᄆ"
436
- result: "lm" # HANGUL JONGSEONG RIEUL + CHOSEONG MIEUM
437
- - pattern: "ᆯᄇ"
438
- result: "lb" # HANGUL JONGSEONG RIEUL + CHOSEONG PIEUP
439
- - pattern: "ᆯᄉ"
440
- result: "ls" # HANGUL JONGSEONG RIEUL + CHOSEONG SIOS
441
- - pattern: "ᆯᄋ"
442
- result: "r" # HANGUL JONGSEONG RIEUL + CHOSEONG IEUNG
443
- - pattern: "ᆯᄌ"
444
- result: "lj" # HANGUL JONGSEONG RIEUL + CHOSEONG CIEUC
445
- - pattern: "ᆯᄎ"
446
- result: "lch" # HANGUL JONGSEONG RIEUL + CHOSEONG CHIEUCH
447
- - pattern: "ᆯᄏ"
448
- result: "lkh" # HANGUL JONGSEONG RIEUL + CHOSEONG KHIEUKH
449
- - pattern: "ᆯᄐ"
450
- result: "lth" # HANGUL JONGSEONG RIEUL + CHOSEONG THIEUTH
451
- - pattern: "ᆯᄑ"
452
- result: "lph" # HANGUL JONGSEONG RIEUL + CHOSEONG PHIEUPH
453
- - pattern: "ᆯᄒ"
454
- result: "lh" # HANGUL JONGSEONG RIEUL + CHOSEONG HIEUH
455
- - pattern: "ᆯᄁ"
456
- result: "lkk" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGKIYEOK
457
- - pattern: "ᆯᄄ"
458
- result: "ltt" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGTIEUT
459
- - pattern: "ᆯᄈ"
460
- result: "lpp" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGPIEUP
461
- - pattern: "ᆯᄊ"
462
- result: "lss" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGSIOS
463
- - pattern: "ᆯᄍ"
464
- result: "ljj" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGCIEUC
465
- - pattern: "ᆷᄀ"
466
- result: "mg" # HANGUL JONGSEONG MIEUM + CHOSEONG KIYEOK
467
- - pattern: "ᆷᄂ"
468
- result: "mn" # HANGUL JONGSEONG MIEUM + CHOSEONG NIEUN
469
- - pattern: "ᆷᄃ"
470
- result: "md" # HANGUL JONGSEONG MIEUM + CHOSEONG TIEUT
471
- - pattern: "ᆷᄅ"
472
- result: "mr" # HANGUL JONGSEONG MIEUM + CHOSEONG RIEUL # Note 3.1
473
- - pattern: "ᆷᄆ"
474
- result: "mm" # HANGUL JONGSEONG MIEUM + CHOSEONG MIEUM
475
- - pattern: "ᆷᄇ"
476
- result: "mb" # HANGUL JONGSEONG MIEUM + CHOSEONG PIEUP
477
- - pattern: "ᆷᄉ"
478
- result: "ms" # HANGUL JONGSEONG MIEUM + CHOSEONG SIOS
479
- - pattern: "ᆷᄋ"
480
- result: "m" # HANGUL JONGSEONG MIEUM + CHOSEONG IEUNG
481
- - pattern: "ᆷᄌ"
482
- result: "mj" # HANGUL JONGSEONG MIEUM + CHOSEONG CIEUC
483
- - pattern: "ᆷᄎ"
484
- result: "mch" # HANGUL JONGSEONG MIEUM + CHOSEONG CHIEUCH
485
- - pattern: "ᆷᄏ"
486
- result: "mkh" # HANGUL JONGSEONG MIEUM + CHOSEONG KHIEUKH
487
- - pattern: "ᆷᄐ"
488
- result: "mth" # HANGUL JONGSEONG MIEUM + CHOSEONG THIEUTH
489
- - pattern: "ᆷᄑ"
490
- result: "mph" # HANGUL JONGSEONG MIEUM + CHOSEONG PHIEUPH
491
- - pattern: "ᆷᄒ"
492
- result: "mh" # HANGUL JONGSEONG MIEUM + CHOSEONG HIEUH
493
- - pattern: "ᆷᄁ"
494
- result: "mkk" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGKIYEOK
495
- - pattern: "ᆷᄄ"
496
- result: "mtt" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGTIEUT
497
- - pattern: "ᆷᄈ"
498
- result: "mpp" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGPIEUP
499
- - pattern: "ᆷᄊ"
500
- result: "mss" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGSIOS
501
- - pattern: "ᆷᄍ"
502
- result: "mjj" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGCIEUC
503
- - pattern: "ᆸᄀ"
504
- result: "pk" # HANGUL JONGSEONG PIEUP + CHOSEONG KIYEOK
505
- - pattern: "ᆸᄂ"
506
- result: "mn" # HANGUL JONGSEONG PIEUP + CHOSEONG NIEUN
507
- - pattern: "ᆸᄃ"
508
- result: "pt" # HANGUL JONGSEONG PIEUP + CHOSEONG TIEUT
509
- - pattern: "ᆸᄅ"
510
- result: "mr" # HANGUL JONGSEONG PIEUP + CHOSEONG RIEUL
511
- - pattern: "ᆸᄆ"
512
- result: "mm" # HANGUL JONGSEONG PIEUP + CHOSEONG MIEUM
513
- - pattern: "ᆸᄇ"
514
- result: "pp" # HANGUL JONGSEONG PIEUP + CHOSEONG PIEUP
515
- - pattern: "ᆸᄉ"
516
- result: "ps" # HANGUL JONGSEONG PIEUP + CHOSEONG SIOS
517
- - pattern: "ᆸᄋ"
518
- result: "b" # HANGUL JONGSEONG PIEUP + CHOSEONG IEUNG
519
- - pattern: "ᆸᄌ"
520
- result: "pj" # HANGUL JONGSEONG PIEUP + CHOSEONG CIEUC
521
- - pattern: "ᆸᄎ"
522
- result: "pch" # HANGUL JONGSEONG PIEUP + CHOSEONG CHIEUCH
523
- - pattern: "ᆸᄏ"
524
- result: "pkh" # HANGUL JONGSEONG PIEUP + CHOSEONG KHIEUKH
525
- - pattern: "ᆸᄐ"
526
- result: "pth" # HANGUL JONGSEONG PIEUP + CHOSEONG THIEUTH
527
- - pattern: "ᆸᄑ"
528
- result: "pph" # HANGUL JONGSEONG PIEUP + CHOSEONG PHIEUPH
529
- - pattern: "ᆸᄒ"
530
- result: "ph" # HANGUL JONGSEONG PIEUP + CHOSEONG HIEUH
531
- - pattern: "ᆸᄁ"
532
- result: "pkk" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGKIYEOK
533
- - pattern: "ᆸᄄ"
534
- result: "ptt" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGTIEUT
535
- - pattern: "ᆸᄈ"
536
- result: "ppp" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGPIEUP
537
- - pattern: "ᆸᄊ"
538
- result: "pss" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGSIOS
539
- - pattern: "ᆸᄍ"
540
- result: "pjj" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGCIEUC
541
- - pattern: "ᆺᄀ"
542
- result: "tk" # HANGUL JONGSEONG SIOS + CHOSEONG KIYEOK
543
- - pattern: "ᆺᄂ"
544
- result: "nn" # HANGUL JONGSEONG SIOS + CHOSEONG NIEUN
545
- - pattern: "ᆺᄃ"
546
- result: "tt" # HANGUL JONGSEONG SIOS + CHOSEONG TIEUT
547
- - pattern: "ᆺᄅ"
548
- result: "nr" # HANGUL JONGSEONG SIOS + CHOSEONG RIEUL # Note 3.1
549
- - pattern: "ᆺᄆ"
550
- result: "nm" # HANGUL JONGSEONG SIOS + CHOSEONG MIEUM
551
- - pattern: "ᆺᄇ"
552
- result: "tp" # HANGUL JONGSEONG SIOS + CHOSEONG PIEUP
553
- - pattern: "ᆺᄉ"
554
- result: "ts" # HANGUL JONGSEONG SIOS + CHOSEONG SIOS
555
- - pattern: "ᆺᄋ"
556
- result: "d" # HANGUL JONGSEONG SIOS + CHOSEONG IEUNG
557
- - pattern: "ᆺᄌ"
558
- result: "tj" # HANGUL JONGSEONG SIOS + CHOSEONG CIEUC
559
- - pattern: "ᆺᄎ"
560
- result: "tch" # HANGUL JONGSEONG SIOS + CHOSEONG CHIEUCH
561
- - pattern: "ᆺᄏ"
562
- result: "tkh" # HANGUL JONGSEONG SIOS + CHOSEONG KHIEUKH
563
- - pattern: "ᆺᄐ"
564
- result: "tth" # HANGUL JONGSEONG SIOS + CHOSEONG THIEUTH
565
- - pattern: "ᆺᄑ"
566
- result: "tph" # HANGUL JONGSEONG SIOS + CHOSEONG PHIEUPH
567
- - pattern: "ᆺᄒ"
568
- result: "th" # HANGUL JONGSEONG SIOS + CHOSEONG HIEUH
569
- - pattern: "ᆺᄁ"
570
- result: "tkk" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGKIYEOK
571
- - pattern: "ᆺᄄ"
572
- result: "ttt" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGTIEUT
573
- - pattern: "ᆺᄈ"
574
- result: "tpp" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGPIEUP
575
- - pattern: "ᆺᄊ"
576
- result: "tss" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGSIOS
577
- - pattern: "ᆺᄍ"
578
- result: "tjj" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGCIEUC
579
- - pattern: "ᆼᄀ"
580
- result: "ngg" # HANGUL JONGSEONG IEUNG + CHOSEONG KIYEOK
581
- - pattern: "ᆼᄂ"
582
- result: "ngn" # HANGUL JONGSEONG IEUNG + CHOSEONG NIEUN
583
- - pattern: "ᆼᄃ"
584
- result: "ngd" # HANGUL JONGSEONG IEUNG + CHOSEONG TIEUT
585
- - pattern: "ᆼᄅ"
586
- result: "ngn" # HANGUL JONGSEONG IEUNG + CHOSEONG RIEUL
587
- - pattern: "ᆼᄆ"
588
- result: "ngm" # HANGUL JONGSEONG IEUNG + CHOSEONG MIEUM
589
- - pattern: "ᆼᄇ"
590
- result: "ngb" # HANGUL JONGSEONG IEUNG + CHOSEONG PIEUP
591
- - pattern: "ᆼᄉ"
592
- result: "ngs" # HANGUL JONGSEONG IEUNG + CHOSEONG SIOS
593
- - pattern: "ᆼᄋ"
594
- result: "ng" # HANGUL JONGSEONG IEUNG + CHOSEONG IEUNG
595
- - pattern: "ᆼᄌ"
596
- result: "ngj" # HANGUL JONGSEONG IEUNG + CHOSEONG CIEUC
597
- - pattern: "ᆼᄎ"
598
- result: "ngch" # HANGUL JONGSEONG IEUNG + CHOSEONG CHIEUCH
599
- - pattern: "ᆼᄏ"
600
- result: "ngkh" # HANGUL JONGSEONG IEUNG + CHOSEONG KHIEUKH
601
- - pattern: "ᆼᄐ"
602
- result: "ngth" # HANGUL JONGSEONG IEUNG + CHOSEONG THIEUTH
603
- - pattern: "ᆼᄑ"
604
- result: "ngph" # HANGUL JONGSEONG IEUNG + CHOSEONG PHIEUPH
605
- - pattern: "ᆼᄒ"
606
- result: "ngh" # HANGUL JONGSEONG IEUNG + CHOSEONG HIEUH
607
- - pattern: "ᆼᄁ"
608
- result: "ngkk" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGKIYEOK
609
- - pattern: "ᆼᄄ"
610
- result: "ngtt" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGTIEUT
611
- - pattern: "ᆼᄈ"
612
- result: "ngpp" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGPIEUP
613
- - pattern: "ᆼᄊ"
614
- result: "ngss" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGSIOS
615
- - pattern: "ᆼᄍ"
616
- result: "ngjj" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGCIEUC
617
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄀ"
618
- result: "g" # VOWEL + CHOSEONG KIYEOK # c.f. Note 3.3
619
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄂ"
620
- result: "n" # VOWEL + CHOSEONG NIEUN # c.f. Note 3.3
621
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄃ"
622
- result: "d" # VOWEL + CHOSEONG TIEUT # c.f. Note 3.3
623
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄅ"
624
- result: "r" # VOWEL + CHOSEONG RIEUL
625
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄆ"
626
- result: "m" # VOWEL + CHOSEONG MIEUM # c.f. Note 3.3
627
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄇ"
628
- result: "b" # VOWEL + CHOSEONG PIEUP # c.f. Note 3.3
629
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄉ"
630
- result: "s" # VOWEL + CHOSEONG SIOS
631
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄋ"
632
- result: "" # VOWEL + CHOSEONG IEUNG
633
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄌ"
634
- result: "j" # VOWEL + CHOSEONG CIEUC
635
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄎ"
636
- result: "ch" # VOWEL + CHOSEONG CHIEUCH
637
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄏ"
638
- result: "kh" # VOWEL + CHOSEONG KHIEUKH
639
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄐ"
640
- result: "th" # VOWEL + CHOSEONG THIEUTH
641
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄑ"
642
- result: "ph" # VOWEL + CHOSEONG PHIEUPH
643
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄒ"
644
- result: "h" # VOWEL + CHOSEONG HIEUH
645
- - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄁ"
646
- result: "kk" # VOWEL + CHOSEONG SSANGKIYEOK
647
- - pattern: "(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄄ"
648
- result: "tt" # VOWEL + CHOSEONG SSANGTIEUT
649
- - pattern: "(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄈ"
650
- result: "pp" # VOWEL + CHOSEONG SSANGPIEUP
651
- - pattern: "(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄊ"
652
- result: "ss" # VOWEL + CHOSEONG SSANGSIOS
653
- - pattern: "(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄍ"
654
- result: "jj" # VOWEL + CHOSEONG SSANGCIEUC
655
- - pattern: "ᆰᄀ"
656
- result: "lg" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG KIYEOK
657
- - pattern: "ᆰᄂ"
658
- result: "ngn" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG NIEUN
659
- - pattern: "ᆰᄃ"
660
- result: "kt" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG TIEUT
661
- - pattern: "ᆰᄅ"
662
- result: "ngn" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG RIEUL
663
- - pattern: "ᆰᄆ"
664
- result: "ngm" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG MIEUM
665
- - pattern: "ᆰᄇ"
666
- result: "kp" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG PIEUP
667
- - pattern: "ᆰᄉ"
668
- result: "ks" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SIOS
669
- - pattern: "ᆰᄋ"
670
- result: "lg" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG IEUNG
671
- - pattern: "ᆰᄌ"
672
- result: "kj" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG CIEUC
673
- - pattern: "ᆰᄎ"
674
- result: "kch" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG CHIEUCH
675
- - pattern: "ᆰᄏ"
676
- result: "lkh" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG KHIEUKH
677
- - pattern: "ᆰᄐ"
678
- result: "kth" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG THIEUTH
679
- - pattern: "ᆰᄑ"
680
- result: "kph" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG PHIEUPH
681
- - pattern: "ᆰᄒ"
682
- result: "lkh" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG HIEUH
683
- - pattern: "ᆰᄁ"
684
- result: "lkk" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGKIYEOK
685
- - pattern: "ᆰᄄ"
686
- result: "ktt" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGTIEUT
687
- - pattern: "ᆰᄈ"
688
- result: "kpp" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGPIEUP
689
- - pattern: "ᆰᄊ"
690
- result: "kss" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGSIOS
691
- - pattern: "ᆰᄍ"
692
- result: "kjj" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGCIEUC
693
- - pattern: "ᆱᄀ"
694
- result: "mg" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG KIYEOK
695
- - pattern: "ᆱᄂ"
696
- result: "mn" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG NIEUN
697
- - pattern: "ᆱᄃ"
698
- result: "md" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG TIEUT
699
- - pattern: "ᆱᄅ"
700
- result: "mr" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG RIEUL
701
- - pattern: "ᆱᄆ"
702
- result: "lm" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG MIEUM
703
- - pattern: "ᆱᄇ"
704
- result: "mb" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG PIEUP
705
- - pattern: "ᆱᄉ"
706
- result: "ms" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SIOS
707
- - pattern: "ᆱᄋ"
708
- result: "lm" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG IEUNG
709
- - pattern: "ᆱᄌ"
710
- result: "mj" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG CIEUC
711
- - pattern: "ᆱᄎ"
712
- result: "mch" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG CHIEUCH
713
- - pattern: "ᆱᄏ"
714
- result: "mkh" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG KHIEUKH
715
- - pattern: "ᆱᄐ"
716
- result: "mth" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG THIEUTH
717
- - pattern: "ᆱᄑ"
718
- result: "mph" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG PHIEUPH
719
- - pattern: "ᆱᄒ"
720
- result: "mh" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG HIEUH
721
- - pattern: "ᆱᄁ"
722
- result: "mkk" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGKIYEOK
723
- - pattern: "ᆱᄄ"
724
- result: "mtt" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGTIEUT
725
- - pattern: "ᆱᄈ"
726
- result: "mpp" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGPIEUP
727
- - pattern: "ᆱᄊ"
728
- result: "mss" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGSIOS
729
- - pattern: "ᆱᄍ"
730
- result: "mjj" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGCIEUC
731
- - pattern: "ᆲᄀ"
732
- result: "pk" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG KIYEOK
733
- - pattern: "ᆲᄂ"
734
- result: "mn" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG NIEUN
735
- - pattern: "ᆲᄃ"
736
- result: "pt" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG TIEUT
737
- - pattern: "ᆲᄅ"
738
- result: "mr" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG RIEUL
739
- - pattern: "ᆲᄆ"
740
- result: "mm" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG MIEUM
741
- - pattern: "ᆲᄇ"
742
- result: "lb" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG PIEUP
743
- - pattern: "ᆲᄉ"
744
- result: "ps" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SIOS
745
- - pattern: "ᆲᄋ"
746
- result: "lb" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG IEUNG
747
- - pattern: "ᆲᄌ"
748
- result: "pj" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG CIEUC
749
- - pattern: "ᆲᄎ"
750
- result: "pch" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG CHIEUCH
751
- - pattern: "ᆲᄏ"
752
- result: "pkh" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG KHIEUKH
753
- - pattern: "ᆲᄐ"
754
- result: "pth" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG THIEUTH
755
- - pattern: "ᆲᄑ"
756
- result: "lph" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG PHIEUPH
757
- - pattern: "ᆲᄒ"
758
- result: "lph" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG HIEUH
759
- - pattern: "ᆲᄁ"
760
- result: "pkk" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGKIYEOK
761
- - pattern: "ᆲᄄ"
762
- result: "ptt" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGTIEUT
763
- - pattern: "ᆲᄈ"
764
- result: "lpp" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGPIEUP
765
- - pattern: "ᆲᄊ"
766
- result: "pss" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGSIOS
767
- - pattern: "ᆲᄍ"
768
- result: "pjj" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGCIEUC
769
- - pattern: "(?<= )ᄀ"
770
- result: "k" # HANGUL CHOSEONG KIYEOK
771
- - pattern: "(?<= )ᄂ"
772
- result: "n" # HANGUL CHOSEONG NIEUN
773
- - pattern: "(?<= )ᄃ"
774
- result: "t" # HANGUL CHOSEONG TIEUT
775
-
776
- # DPRK does not follow the R-onset rule
777
- # - pattern: "(?<= )ᄅ(?=[ᅣᅤᅧᅨᅭᅲ])"
778
- # result: "" # HANGUL CHOSEONG RIEUL # R-onset rule
779
- - pattern: "(?<= )ᄅ"
780
- # result: "n" # HANGUL CHOSEONG RIEUL
781
- result: "r"
782
-
783
- - pattern: "(?<= )ᄆ"
784
- result: "m" # HANGUL CHOSEONG MIEUM
785
- - pattern: "(?<= )ᄇ"
786
- result: "p" # HANGUL CHOSEONG PIEUP
787
- - pattern: "(?<= )ᄉ"
788
- result: "s" # HANGUL CHOSEONG SIOS
789
- - pattern: "(?<= )ᄋ"
790
- result: "" # HANGUL CHOSEONG IEUNG
791
- - pattern: "(?<= )ᄌ"
792
- result: "j" # HANGUL CHOSEONG CIEUC
793
- - pattern: "(?<= )ᄎ"
794
- result: "ch" # HANGUL CHOSEONG CHIEUCH
795
- - pattern: "(?<= )ᄏ"
796
- result: "kh" # HANGUL CHOSEONG KHIEUKH
797
- - pattern: "(?<= )ᄐ"
798
- result: "th" # HANGUL CHOSEONG THIEUTH
799
- - pattern: "(?<= )ᄑ"
800
- result: "ph" # HANGUL CHOSEONG PHIEUPH
801
- - pattern: "(?<= )ᄒ"
802
- result: "h" # HANGUL CHOSEONG HIEUH
803
- - pattern: "(?<= )ᄁ"
804
- result: "kk" # HANGUL CHOSEONG SSANGKIYEOK
805
- - pattern: "(?<= )ᄭ"
806
- result: "kk" # HANGUL CHOSEONG SIOS-KIYEOK
807
- - pattern: "(?<= )ᄄ"
808
- result: "tt" # HANGUL CHOSEONG SSANGTIEUT
809
- - pattern: "(?<= )ᄯ"
810
- result: "tt" # HANGUL CHOSEONG SIOS-TIEUT
811
- - pattern: "(?<= )ᄈ"
812
- result: "pp" # HANGUL CHOSEONG SSANGPIEUP
813
- - pattern: "(?<= )ᄲ"
814
- result: "pp" # HANGUL CHOSEONG SIOS-PIEUP
815
- - pattern: "(?<= )ᄊ"
816
- result: "ss" # HANGUL CHOSEONG SSANGSIOS
817
- - pattern: "(?<= )ᄍ"
818
- result: "jj" # HANGUL CHOSEONG SSANGCIEUC
819
- - pattern: "(?<= )ᄶ"
820
- result: "jj" # HANGUL CHOSEONG SIOS-CIEUC
821
- - pattern: "ᅡ"
822
- result: "a" # HANGUL JUNGSEONG A
823
- - pattern: "ᅣ"
824
- result: "ya" # HANGUL JUNGSEONG YA
825
- - pattern: "ᅥ"
826
- result: "ŏ" # HANGUL JUNGSEONG EO
827
- - pattern: "ᅧ"
828
- result: "yŏ" # HANGUL JUNGSEONG YEO
829
- - pattern: "ᅩ"
830
- result: "o" # HANGUL JUNGSEONG O
831
- - pattern: "ᅭ"
832
- result: "yo" # HANGUL JUNGSEONG YO
833
- - pattern: "ᅮ"
834
- result: "u" # HANGUL JUNGSEONG U
835
- - pattern: "ᅲ"
836
- result: "yu" # HANGUL JUNGSEONG YU
837
- - pattern: "ᅳ"
838
- result: "ü" # HANGUL JUNGSEONG EU
839
- - pattern: "ᅵ"
840
- result: "i" # HANGUL JUNGSEONG I
841
- - pattern: "ᅢ"
842
- result: "ae" # HANGUL JUNGSEONG AE
843
- - pattern: "ᅤ"
844
- result: "yae" # HANGUL JUNGSEONG YAE
845
- - pattern: "ᅦ"
846
- result: "e" # HANGUL JUNGSEONG E
847
- - pattern: "ᅨ"
848
- result: "ye" # HANGUL JUNGSEONG YE
849
- - pattern: "ᅬ"
850
- result: "oe" # HANGUL JUNGSEONG OE
851
- - pattern: "ᅱ"
852
- result: "wi" # HANGUL JUNGSEONG WI
853
- - pattern: "ᅴ"
854
- result: "üi" # HANGUL JUNGSEONG YI
855
- - pattern: "ᅪ"
856
- result: "wa" # HANGUL JUNGSEONG WA
857
- - pattern: "ᅯ"
858
- result: "wo" # HANGUL JUNGSEONG WEO
859
- - pattern: "ᅫ"
860
- result: "wae" # HANGUL JUNGSEONG WAE
861
- - pattern: "ᅰ"
862
- result: "we" # HANGUL JUNGSEONG WE
863
- - pattern: "ᆨ(?=[ A-Za-z0-9-])"
864
- result: "k" # HANGUL JONGSEONG KIYEOK
865
- - pattern: "ᆫ(?=[ A-Za-z0-9-])"
866
- result: "n" # HANGUL JONGSEONG NIEUN
867
- - pattern: "ᆮ(?=[ A-Za-z0-9-])"
868
- result: "t" # HANGUL JONGSEONG TIEUT
869
- - pattern: "ᆯ(?=[ A-Za-z0-9-])"
870
- result: "l" # HANGUL JONGSEONG RIEUL
871
- - pattern: "ᆷ(?=[ A-Za-z0-9-])"
872
- result: "m" # HANGUL JONGSEONG MIEUM
873
- - pattern: "ᆸ(?=[ A-Za-z0-9-])"
874
- result: "p" # HANGUL JONGSEONG PIEUP
875
- - pattern: "ᆺ(?=[ A-Za-z0-9-])"
876
- result: "t" # HANGUL JONGSEONG SIOS
877
- - pattern: "ᆼ(?=[ A-Za-z0-9-])"
878
- result: "ng" # HANGUL JONGSEONG IEUNG
879
- - pattern: "ᆽ(?=[ A-Za-z0-9-])"
880
- result: "t" # HANGUL JONGSEONG CIEUC
881
- - pattern: "ᆾ(?=[ A-Za-z0-9-])"
882
- result: "t" # HANGUL JONGSEONG CHIEUCH
883
- - pattern: "ᆿ(?=[ A-Za-z0-9-])"
884
- result: "k" # HANGUL JONGSEONG KHIEUKH
885
- - pattern: "ᇀ(?=[ A-Za-z0-9-])"
886
- result: "t" # HANGUL JONGSEONG THIEUTH
887
- - pattern: "ᇁ(?=[ A-Za-z0-9-])"
888
- result: "p" # HANGUL JONGSEONG PHIEUPH
889
- - pattern: "ᆰ(?=[ A-Za-z0-9-])"
890
- result: "k" # HANGUL JONGSEONG RIEUL-KIYEOK
891
- - pattern: "ᆲ(?=[ A-Za-z0-9-])"
892
- result: "p" # HANGUL JONGSEONG RIEUL-PIEUP
893
-
894
- # Remove space added
895
- - pattern: "^ "
896
- result: ""
897
- - pattern: " $"
898
- result: ""
899
-
900
- characters:
901
- # This is based on Jamo