interscript 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +246 -14
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/g2pwrapper.py +34 -0
  6. data/lib/interscript.rb +140 -16
  7. data/lib/interscript/command.rb +27 -0
  8. data/lib/interscript/mapping.rb +125 -0
  9. data/lib/interscript/version.rb +1 -1
  10. data/lib/model-7 +0 -0
  11. data/lib/tha-pt-b-7 +0 -0
  12. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  13. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  14. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  15. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  18. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  19. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  21. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  22. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  23. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  24. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  25. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  26. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  27. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  28. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
  29. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
  30. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  31. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  32. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  33. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  34. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  35. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
  36. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  37. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  38. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  39. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
  40. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
  41. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  42. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  43. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  44. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  45. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  46. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  47. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  48. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  49. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  50. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  51. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
  52. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  53. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
  54. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  57. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  59. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  60. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  61. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  62. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  63. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  64. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  65. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
  68. data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
  69. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
  70. data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
  71. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  72. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
  73. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  74. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  75. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  76. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  77. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
  78. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  79. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  80. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  81. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  82. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  83. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  84. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  85. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  86. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  87. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  88. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  89. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  90. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  91. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
  92. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  93. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  94. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  95. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  96. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  97. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  98. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  99. data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
  100. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  101. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  102. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  103. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  104. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  105. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  106. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  107. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  108. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  109. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  110. data/spec/interscript/mapping_spec.rb +42 -0
  111. data/spec/interscript_spec.rb +20 -5
  112. data/spec/spec_helper.rb +3 -1
  113. metadata +149 -24
  114. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  115. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  116. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  117. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  118. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  119. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  120. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -0,0 +1,41 @@
1
+ ---
2
+ authority_id: iso
3
+ id: 1997
4
+ language: ell
5
+ source_script: Grek
6
+ destination_script: Latn
7
+ name: ISO 843:1997
8
+ url:
9
+ creation_date: 1997
10
+ description: |
11
+ ISO Transcription table for Greek
12
+
13
+ note:
14
+ - Transliteration of Greek into Latin: Type 2, Clause 3 Table 2
15
+ - Introduced casing to digamma, yot, and lunate sigma. (Casing was late introduction to character sets for those characters)
16
+
17
+ tests:
18
+
19
+ - source: |
20
+ Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί, και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι· όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ. Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος. Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»· όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ». Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
21
+
22
+ Γιάννης Μακρυγιάννης.
23
+
24
+ expected: |
25
+ Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrída tin échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai ftochoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o katheís, échomen na zísomen edó. To loipón doulépsamen óloi mazí, na tin fylámen ki óloi mazí kai na min légei oúte o dynatós «egó» oúte o adýnatos. Xérete póte na légei o katheís «egó»? Ótan agonisteí mónos tou kai fkiásei í chalásei, na légei «egó»; ótan ómos agonízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó». Kai eis to exís na máthomen gnósi, an thélomen na fkiásomen chorión, na zísomen óloi mazí.
26
+
27
+ Giánnis Makrygiánnis.
28
+
29
+ map:
30
+ character_separator: ""
31
+ word_separator: " "
32
+ inherit: "elot-ell-Grek-Latn-743-1982-ts"
33
+
34
+ characters:
35
+ "\u03DC": "W" # Ϝ
36
+ "\u03DD": "w" # ϝ
37
+ "\u03F2": "s" # ϲ
38
+ "\u03F9": "S" # Ϲ
39
+ "\u03F3": "j"
40
+ "\u037F": "j"
41
+
@@ -0,0 +1,62 @@
1
+ ---
2
+ authority_id: iso
3
+ id: 3602-1989
4
+ language: jpn
5
+ source_script: Hrkt
6
+ destination_script: Latn
7
+ name: ISO 3602 Romanization of Japanese (Kana Script)
8
+ url:
9
+ creation_date:
10
+ adoption_date:
11
+ description:
12
+
13
+ notes:
14
+
15
+ tests:
16
+ - source: かんおう
17
+ expected: kan’ô
18
+ - source: かのう
19
+ expected: kanô
20
+ - source: きんゆう
21
+ expected: kin’yû
22
+ - source: とうきょう
23
+ expected: tôkyô
24
+ - source: がっ•こう
25
+ expected: gakkô
26
+ - source: かごっま
27
+ expected: kagomma
28
+ - source: ぽっぽっや
29
+ expected: poppoyya
30
+ - source: てっら
31
+ expected: terra
32
+ - source: にゃっほー
33
+ expected: nyahhô
34
+ - source: ゴッホ
35
+ expected: gohho
36
+ - source: おも•う
37
+ expected: omou
38
+ - source: こうし
39
+ expected: kôsi
40
+ - source: こう•し #格子
41
+ expected: kôsi
42
+ - source: こ•うし #子牛
43
+ expected: kousi
44
+ - source: ぎゃあ
45
+ expected: gyâ
46
+
47
+ map:
48
+ inherit: mext-jpn-Hrkt-Latn-1954
49
+
50
+ rules:
51
+ # Remove morpheme boundary marker after sokuon っ/ッ
52
+ - pattern: "([っッ])•"
53
+ result: "\\1"
54
+
55
+ postrules:
56
+ # Remove morpheme boundary marker
57
+ - pattern: "•"
58
+ result: ""
59
+
60
+ # Use ’ instead of '
61
+ - pattern: "'"
62
+ result: "’"
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  authority_id: iso
3
- id: iso9
3
+ id: 9-1995
4
4
  language: rus
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
@@ -14,8 +14,7 @@ description: |
14
14
  Cyrillic alphabetic order, the 118 single or diacritic-carrying
15
15
  characters that appear in one or another of the considered alphabets.
16
16
  tests:
17
- - source:
18
- expected:
17
+
19
18
 
20
19
  map:
21
20
  characters:
@@ -0,0 +1,109 @@
1
+ ---
2
+ authority_id: iso
3
+ id: 11940-1998
4
+ language: tha
5
+ source_script: Thai
6
+ destination_script: Latn
7
+ name: ISO 11940:1998 Information and documentation -- Transliteration of Thai
8
+ url: https://www.iso.org/standard/20574.html
9
+ creation_date: 1998
10
+ adoption_date:
11
+ description:
12
+
13
+ notes:
14
+
15
+ tests:
16
+ - source: 'ภาษาไทย'
17
+ expected: 'p̣hās̛̄āịthy'
18
+ - source: 'เชียงใหม่'
19
+ expected: 'echīyngıh̄m̀'
20
+
21
+ map:
22
+
23
+ characters:
24
+ '\u0e01': 'k' # ก THAI CHARACTER KO KAI
25
+ '\u0e02': 'k̄h' # ข THAI CHARACTER KHO KHAI
26
+ '\u0e03': 'ḳ̄h' # ฃ THAI CHARACTER KHO KHUAT
27
+ '\u0e04': 'kh' # ค THAI CHARACTER KHO KHWAI
28
+ '\u0e05': 'k̛h' # ฅ THAI CHARACTER KHO KHON
29
+ '\u0e06': 'ḳh' # ฆ THAI CHARACTER KHO RAKHANG
30
+ '\u0e07': 'ng' # ง THAI CHARACTER NGO NGU
31
+ '\u0e08': 'c' # จ THAI CHARACTER CHO CHAN
32
+ '\u0e09': 'c̄h' # ฉ THAI CHARACTER CHO CHING
33
+ '\u0e0a': 'ch' # ช THAI CHARACTER CHO CHANG
34
+ '\u0e0b': 's' # ซ THAI CHARACTER SO SO
35
+ '\u0e0c': 'c̣h' # ฌ THAI CHARACTER CHO CHOE
36
+ '\u0e0d': 'ỵ' # ญ THAI CHARACTER YO YING
37
+ '\u0e0e': 'ḍ' # ฎ THAI CHARACTER DO CHADA
38
+ '\u0e0f': 'ṭ' # ฏ THAI CHARACTER TO PATAK
39
+ '\u0e10': 'ṭ̄h' # ฐ THAI CHARACTER THO THAN
40
+ '\u0e11': 'ṯh' # ฑ THAI CHARACTER THO NANGMONTHO
41
+ '\u0e12': 't̛h' # ฒ THAI CHARACTER THO PHUTHAO
42
+ '\u0e13': 'ṇ' # ณ THAI CHARACTER NO NEN
43
+ '\u0e14': 'd' # ด THAI CHARACTER DO DEK
44
+ '\u0e15': 't' # ต THAI CHARACTER TO TAO
45
+ '\u0e16': 't̄h' # ถ THAI CHARACTER THO THUNG
46
+ '\u0e17': 'th' # ท THAI CHARACTER THO THAHAN
47
+ '\u0e18': 'ṭh' # ธ THAI CHARACTER THO THONG
48
+ '\u0e19': 'n' # น THAI CHARACTER NO NU
49
+ '\u0e1a': 'b' # บ THAI CHARACTER BO BAIMAI
50
+ '\u0e1b': 'p' # ป THAI CHARACTER PO PLA
51
+ '\u0e1c': 'p̄h' # ผ THAI CHARACTER PHO PHUNG
52
+ '\u0e1d': 'f̄' # ฝ THAI CHARACTER FO FA
53
+ '\u0e1e': 'ph' # พ THAI CHARACTER PHO PHAN
54
+ '\u0e1f': 'f' # ฟ THAI CHARACTER FO FAN
55
+ '\u0e20': 'p̣h' # ภ THAI CHARACTER PHO SAMPHAO
56
+ '\u0e21': 'm' # ม THAI CHARACTER MO MA
57
+ '\u0e22': 'y' # ย THAI CHARACTER YO YAK
58
+ '\u0e23': 'r' # ร THAI CHARACTER RO RUA
59
+ '\u0e24': 'v' # ฤ THAI CHARACTER RU
60
+ '\u0e25': 'l' # ล THAI CHARACTER LO LING
61
+ '\u0e26': 'ł' # ฦ THAI CHARACTER LU
62
+ '\u0e27': 'w' # ว THAI CHARACTER WO WAEN
63
+ '\u0e28': 'ṣ̄' # ศ THAI CHARACTER SO SALA
64
+ '\u0e29': 's̛̄' # ษ THAI CHARACTER SO RUSI
65
+ '\u0e2a': 's̄' # ส THAI CHARACTER SO SUA
66
+ '\u0e2b': 'h̄' # ห THAI CHARACTER HO HIP
67
+ '\u0e2c': 'ḷ' # ฬ THAI CHARACTER LO CHULA
68
+ '\u0e2d': 'x' # อ THAI CHARACTER O ANG
69
+ '\u0e2e': 'ḥ' # ฮ THAI CHARACTER HO NOKHUK
70
+ '\u0e2f': 'ǂ' # ฯ THAI CHARACTER PAIYANNOI
71
+ '\u0e30': 'a' # ะ THAI CHARACTER SARA A
72
+ '\u0e31': 'ạ' # ั THAI CHARACTER MAI HAN-AKAT
73
+ '\u0e32': 'ā' # า THAI CHARACTER SARA AA
74
+ '\u0e33': 'å' # ำ THAI CHARACTER SARA AM
75
+ '\u0e34': 'i' # ิ THAI CHARACTER SARA I
76
+ '\u0e35': 'ī' # ี THAI CHARACTER SARA II
77
+ '\u0e36': 'ụ' # ึ THAI CHARACTER SARA UE
78
+ '\u0e37': 'ụ̄' # ื THAI CHARACTER SARA UEE
79
+ '\u0e38': 'u' # ุ THAI CHARACTER SARA U
80
+ '\u0e39': 'ū' # ู THAI CHARACTER SARA UU
81
+ '\u0e3a': '–̥' # ฺ THAI CHARACTER PHINTHU
82
+ '\u0e40': 'e' # เ THAI CHARACTER SARA E
83
+ '\u0e41': 'æ' # แ THAI CHARACTER SARA AE
84
+ '\u0e42': 'o' # โ THAI CHARACTER SARA O
85
+ '\u0e43': 'ı' # ใ THAI CHARACTER SARA AI MAIMUAN
86
+ '\u0e44': 'ị' # ไ THAI CHARACTER SARA AI MAIMALAI
87
+ '\u0e45': 'ɨ' # ๅ THAI CHARACTER LAKKHANGYAO
88
+ '\u0e46': '«' # ๆ THAI CHARACTER MAIYAMOK
89
+ '\u0e47': '̆' # ็ THAI CHARACTER MAITAIKHU
90
+ '\u0e48': '̀' # ่ THAI CHARACTER MAI EK
91
+ '\u0e49': '̂' # ้ THAI CHARACTER MAI THO
92
+ '\u0e4a': '́' # ๊ THAI CHARACTER MAI TRI
93
+ '\u0e4b': '̌' # ๋ THAI CHARACTER MAI CHATTAWA
94
+ '\u0e4c': '̒' # ์ THAI CHARACTER THANTHAKHAT
95
+ '\u0e4d': '̊' # ํ THAI CHARACTER NIKHAHIT
96
+ '\u0e4e': '~' # ๎ THAI CHARACTER YAMAKKAN
97
+ '\u0e4f': '§' # ๏ THAI CHARACTER FONGMAN
98
+ '\u0e50': '0' # ๐ THAI DIGIT ZERO
99
+ '\u0e51': '1' # ๑ THAI DIGIT ONE
100
+ '\u0e52': '2' # ๒ THAI DIGIT TWO
101
+ '\u0e53': '3' # ๓ THAI DIGIT THREE
102
+ '\u0e54': '4' # ๔ THAI DIGIT FOUR
103
+ '\u0e55': '5' # ๕ THAI DIGIT FIVE
104
+ '\u0e56': '6' # ๖ THAI DIGIT SIX
105
+ '\u0e57': '7' # ๗ THAI DIGIT SEVEN
106
+ '\u0e58': '8' # ๘ THAI DIGIT EIGHT
107
+ '\u0e59': '9' # ๙ THAI DIGIT NINE
108
+ '\u0e5a': 'ǁ' # ๚ THAI CHARACTER ANGKHANKHU
109
+ '\u0e5b': '»' # ๛ THAI CHARACTER KHOMUT
@@ -0,0 +1,901 @@
1
+ ---
2
+ authority_id: kp
3
+ id: 2002
4
+ language: kor
5
+ source_script: Hang
6
+ destination_script: Latn
7
+ name: Korean Democratic People's Republic of Korea Korean System (2002)
8
+ url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/8th-uncsgn-docs/inf/8th_UNCSGN_econf.94_INF.72.pdf
9
+ creation_date:
10
+ adoption_date:
11
+ description:
12
+
13
+ notes:
14
+
15
+ - Here is a list of features that are listed in the guideline but
16
+ not unimplemented in this map.
17
+
18
+ - Note 3.2
19
+ The combination n+r is romanized as -ll- only when it is "considered
20
+ to be longstanding". In this implementation, all n+r will be romanized as
21
+ -ll- for the sake of simplicity.
22
+
23
+ - Note 3.3
24
+ Sai-siot (Connective ㅅ) is not written out in DPRK Korean, but it is
25
+ supposed to be romanized. Sai-siot is not predictable.
26
+ This has not been implemented.
27
+
28
+ - Note 4.1
29
+ Hyphen "may be inserted in case of a possible confusion in pronunciation".
30
+ Except for the n-g combination, this has not been implemented.
31
+
32
+ - Note 4.4
33
+ Geographical names "may be transliterated or translated". In this map,
34
+ all names will be transliterated, not translated. Numerals will not be
35
+ transliterated.
36
+
37
+ - Note 4.5
38
+ Spacing rule for personal names has not been implemented.
39
+
40
+ - Note 4.7
41
+ Optional omission of diacritics and optional simplification of
42
+ KK, TT, PP, SS, JJ to single letter have not been implemented.
43
+
44
+ tests:
45
+ # Note1.5
46
+ - source: "우리산"
47
+ expected: "Urisan"
48
+
49
+ # Note2.1
50
+ - source: "교구동"
51
+ expected: "Kyogu-dong"
52
+ - source: "초도"
53
+ expected: "Chodo"
54
+ - source: "고비리"
55
+ expected: "Kobi-ri"
56
+ - source: "강동"
57
+ expected: "Kangdong"
58
+ - source: "금교"
59
+ expected: "Kümgyo"
60
+ - source: "칠보산"
61
+ expected: "Chilbosan"
62
+
63
+ # Note2.2
64
+ - source: "곡산"
65
+ expected: "Koksan"
66
+ - source: "갑산"
67
+ expected: "Kapsan"
68
+ - source: "앞산"
69
+ expected: "Apsan"
70
+ - source: "삿갓봉"
71
+ expected: "Satkatbong"
72
+
73
+ # Note2.3
74
+ - source: "울산"
75
+ expected: "Ulsan"
76
+ # - source: "은률"
77
+ # expected: "Ünryul" # This is an exceptino to note 3.1
78
+
79
+ # Note2.4
80
+ - source: "닭섬"
81
+ expected: "Taksŏm"
82
+ - source: "물곬"
83
+ expected: "Mulkol"
84
+ - source: "붉은바위"
85
+ expected: "Pulgünbawi"
86
+ - source: "앉은바위"
87
+ expected: "Anjünbawi"
88
+
89
+ # Note3.1
90
+ - source: "백마산"
91
+ expected: "Paengmasan"
92
+ - source: "꽃마을"
93
+ expected: "Kkonmaül"
94
+ - source: "압록강"
95
+ expected: "Amrokgang"
96
+
97
+ # Note3.2
98
+ - source: "천리마"
99
+ expected: "Chŏllima"
100
+ # - source: "한나산" # Typo in the original document
101
+ - source: "한라산"
102
+ expected: "Hallasan"
103
+ - source: "전라도"
104
+ expected: "Jŏlla-do"
105
+
106
+ # Note3.3
107
+
108
+ # - source: "기대산" # ROK: 깃대산
109
+ # expected: "Kittaesan"
110
+ # - source: "새별읍" # ROK: 샛별
111
+ # expected: "Saeppyŏl-üp" # hyphen
112
+ # - source: "뒤문" # ROK: 뒷문
113
+ # expected: "Twinmun"
114
+
115
+ # Note4.1 - Separator (OPTIONAL)
116
+
117
+ - source: "앞-언덕"
118
+ expected: "Ap-ŏndŏk"
119
+ - source: "부억-안골"
120
+ expected: "Puŏk-angol"
121
+ - source: "판교"
122
+ expected: "Phan-gyo"
123
+ # - source: "방어동"
124
+ # expected: "Pang-ŏ-dong"
125
+
126
+ # Note4.2
127
+ - source: "평안남도 평성시"
128
+ expected: "Phyŏngannam-do Phyŏngsŏng-si"
129
+
130
+ # Note4.3
131
+ - source: "3.1동"
132
+ expected: "3.1-dong"
133
+
134
+ # Note4.6
135
+ - source: "평양"
136
+ expected: "Pyongyang"
137
+
138
+ map:
139
+ character_separator: ""
140
+ word_separator: " "
141
+ title_case: True
142
+ inherit: "nil-kor-Hang-Hang-jamo"
143
+
144
+ rules:
145
+
146
+ # This system does not require transliteration of numerals
147
+ # convert numbers to space + Hangul
148
+ # - pattern: "([^0-9 ])(?=[0-9])"
149
+ # result: "\\1 "
150
+ # - pattern: "1"
151
+ # result: "일"
152
+ # - pattern: "2"
153
+ # result: "이"
154
+ # - pattern: "3"
155
+ # result: "삼"
156
+ # - pattern: "4"
157
+ # result: "사"
158
+ # - pattern: "5"
159
+ # result: "오"
160
+ # - pattern: "6"
161
+ # result: "육"
162
+ # - pattern: "7"
163
+ # result: "칠"
164
+ # - pattern: "8"
165
+ # result: "팔"
166
+ # - pattern: "9"
167
+ # result: "구"
168
+
169
+ # Use voiced onset for geographical features
170
+ # Note 4.3.1
171
+ - pattern: "(?<=..)산( |$)"
172
+ result: "san\\1"
173
+ - pattern: "(?<=..)거리( |$)"
174
+ result: "gŏri\\1"
175
+ - pattern: "(?<=..)고개( |$)"
176
+ result: "gogae\\1"
177
+ - pattern: "(?<=..)대( |$)"
178
+ result: "dae\\1"
179
+ - pattern: "(?<=..)봉( |$)"
180
+ result: "bong\\1"
181
+ - pattern: "(?<=..)교( |$)"
182
+ result: "gyo\\1"
183
+ - pattern: "(?<=..)골( |$)"
184
+ result: "gol\\1"
185
+ - pattern: "(?<=..)각( |$)"
186
+ result: "gak\\1"
187
+ - pattern: "(?<=..)벌( |$)"
188
+ result: "bŏl\\1"
189
+ - pattern: "(?<=..)관( |$)"
190
+ result: "gwan\\1"
191
+ - pattern: "(?<=..)곶( |$)"
192
+ result: "got\\1"
193
+ - pattern: "(?<=..)강( |$)"
194
+ result: "gang\\1"
195
+
196
+ # add hyphen in front of generics
197
+ # Only add hyphen if the name is three syllables or longer
198
+ - pattern: "(?<=..)도( |$)"
199
+ result: "-do\\1"
200
+ - pattern: "(?<=..)시( |$)"
201
+ result: "-si\\1"
202
+ - pattern: "(?<=..)군( |$)"
203
+ result: "-gun\\1"
204
+ - pattern: "(?<=..)면( |$)"
205
+ result: "-myŏn\\1"
206
+ - pattern: "(?<=..)리( |$)"
207
+ result: "-ri\\1"
208
+ - pattern: "(?<=..)동( |$)"
209
+ result: "-dong\\1"
210
+ - pattern: "(?<=..)구( |$)"
211
+ result: "-gu\\1"
212
+ - pattern: "(?<=..)구역( |$)"
213
+ result: "-guyŏk\\1"
214
+
215
+ # The name Pyongyang will be an exception
216
+ # Not Phyŏngyang
217
+
218
+ - pattern: "평양"
219
+ result: "Pyongyang"
220
+
221
+ postrules:
222
+
223
+ # Add space to the two ends of the string for easier word boundary handling
224
+ - pattern: "^"
225
+ result: " "
226
+ - pattern: "$"
227
+ result: " "
228
+
229
+ # HANGUL JONGSEONG SSANGKIYEOK
230
+ - pattern: "ᆩᄋ"
231
+ result: "ᆨᄁ"
232
+ - pattern: "ᆩ"
233
+ result: "ᆨ"
234
+
235
+ # HANGUL JONGSEONG SSANGKIYEOK
236
+ - pattern: "ᆪᄋ"
237
+ result: "ᆨᄉ"
238
+ - pattern: "ᆪ"
239
+ result: "ᆨ"
240
+
241
+ # HANGUL JONGSEONG NIEUN-CIEUC
242
+ - pattern: "ᆬᄋ"
243
+ result: "ᆫᄌ"
244
+ - pattern: "ᆬ"
245
+ result: "ᆫ"
246
+
247
+ # HANGUL JONGSEONG NIEUN-CIEUC
248
+ - pattern: "ᆭᄀ"
249
+ result: "ᆫᄏ"
250
+ - pattern: "ᆭᄃ"
251
+ result: "ᆫᄐ"
252
+ - pattern: "ᆭᄇ"
253
+ result: "ᆫᄑ"
254
+ - pattern: "ᆭᄌ"
255
+ result: "ᆫᄎ"
256
+ - pattern: "ᆭ"
257
+ result: "ᆫ"
258
+
259
+ # HANGUL JONGSEONG TIEUT
260
+ - pattern: "ᆮ(?=[ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄌᄍᄎᄏᄐᄑᄒ])"
261
+ result: "ᆺ"
262
+
263
+ # HANGUL JONGSEONG RIEUL-SIOS
264
+ - pattern: "ᆳᄋ"
265
+ result: "ᆯᄉ"
266
+ - pattern: "ᆳ"
267
+ result: "ᆯ"
268
+
269
+ # HANGUL JONGSEONG RIEUL-THIEUTH
270
+ - pattern: "ᆴᄋ"
271
+ result: "ᆯᄐ"
272
+ - pattern: "ᆴ"
273
+ result: "ᆯ"
274
+
275
+ # HANGUL JONGSEONG RIEUL-PHIEUPH
276
+ - pattern: "ᆵᄋ"
277
+ result: "ᆯᄑ"
278
+ - pattern: "ᆵ(?=[ᄃᄄᄐ])"
279
+ result: "ᆯ"
280
+ - pattern: "ᆵ"
281
+ result: "ᄇ"
282
+
283
+ # HANGUL JONGSEONG RIEUL-HIEUH
284
+ - pattern: "ᆶᄀ"
285
+ result: "ᆯᄏ"
286
+ - pattern: "ᆶᄃ"
287
+ result: "ᆯᄐ"
288
+ - pattern: "ᆶᄇ"
289
+ result: "ᆯᄑ"
290
+ - pattern: "ᆶᄌ"
291
+ result: "ᆯᄎ"
292
+ - pattern: "ᆶ"
293
+ result: "ᆯ"
294
+
295
+ # HANGUL JONGSEONG PIEUP-SIOS
296
+ - pattern: "ᆹᄋ"
297
+ result: "ᄇᄉ"
298
+ - pattern: "ᆹ"
299
+ result: "ᄇ"
300
+
301
+ # HANGUL JONGSEONG SSANG-SIOS
302
+ - pattern: "ᆻᄋ"
303
+ result: "ᆺᄊ"
304
+ - pattern: "ᆻ"
305
+ result: "ᆺ"
306
+
307
+ # HANGUL JONGSEONG CIEUC
308
+ - pattern: "ᆽᄋ"
309
+ result: "ᆺᄌ"
310
+ - pattern: "ᆽ"
311
+ result: "ᆺ"
312
+
313
+ # HANGUL JONGSEONG CHIEUCH
314
+ - pattern: "ᆾᄋ"
315
+ result: "ᆺᄎ"
316
+ - pattern: "ᆾ"
317
+ result: "ᆺ"
318
+
319
+ # HANGUL JONGSEONG KHIEUKH
320
+ - pattern: "ᆿᄋ"
321
+ result: "ᆨᄏ"
322
+ - pattern: "ᆿ"
323
+ result: "ᆨ"
324
+
325
+ # HANGUL JONGSEONG THIEUTH
326
+ - pattern: "ᇀᄋ"
327
+ result: "ᆺᄐ"
328
+ - pattern: "ᇀ"
329
+ result: "ᆺ"
330
+
331
+ # HANGUL JONGSEONG PHIEUPH
332
+ - pattern: "ᇁᄋ"
333
+ result: "ᆸᄑ"
334
+ - pattern: "ᇁ"
335
+ result: "ᆸ"
336
+
337
+ # HANGUL JONGSEONG HIEUH
338
+ - pattern: "ᇂᄀ"
339
+ result: "ᄏ"
340
+ - pattern: "ᇂᄃ"
341
+ result: "ᄐ"
342
+ - pattern: "ᇂᄇ"
343
+ result: "ᄑ"
344
+ - pattern: "ᇂᄌ"
345
+ result: "ᄎ"
346
+ - pattern: "ᇂ"
347
+ result: ""
348
+
349
+ # From Unicode Chart
350
+ # https://github.com/unicode-org/cldr/blob/master/common/transforms/Korean-Latin-BGN.xml
351
+ - pattern: "ᆨᄀ"
352
+ result: "kk" # HANGUL JONGSEONG KIYEOK + CHOSEONG KIYEOK
353
+ - pattern: "ᆨᄂ"
354
+ result: "ngn" # HANGUL JONGSEONG KIYEOK + CHOSEONG NIEUN
355
+ - pattern: "ᆨᄃ"
356
+ result: "kt" # HANGUL JONGSEONG KIYEOK + CHOSEONG TIEUT
357
+ - pattern: "ᆨᄅ"
358
+ result: "ngn" # HANGUL JONGSEONG KIYEOK + CHOSEONG RIEUL
359
+ - pattern: "ᆨᄆ"
360
+ result: "ngm" # HANGUL JONGSEONG KIYEOK + CHOSEONG MIEUM
361
+ - pattern: "ᆨᄇ"
362
+ result: "kp" # HANGUL JONGSEONG KIYEOK + CHOSEONG PIEUP
363
+ - pattern: "ᆨᄉ"
364
+ result: "ks" # HANGUL JONGSEONG KIYEOK + CHOSEONG SIOS
365
+ - pattern: "ᆨᄋ"
366
+ result: "g" # HANGUL JONGSEONG KIYEOK + CHOSEONG IEUNG
367
+ - pattern: "ᆨᄌ"
368
+ result: "kj" # HANGUL JONGSEONG KIYEOK + CHOSEONG CIEUC
369
+ - pattern: "ᆨᄎ"
370
+ result: "kch" # HANGUL JONGSEONG KIYEOK + CHOSEONG CHIEUCH
371
+ - pattern: "ᆨᄏ"
372
+ result: "kkh" # HANGUL JONGSEONG KIYEOK + CHOSEONG KHIEUKH # NOTE: the dash is always skipped
373
+ - pattern: "ᆨᄐ"
374
+ result: "kth" # HANGUL JONGSEONG KIYEOK + CHOSEONG THIEUTH
375
+ - pattern: "ᆨᄑ"
376
+ result: "kp" # HANGUL JONGSEONG KIYEOK + CHOSEONG PHIEUPH
377
+ - pattern: "ᆨᄒ"
378
+ result: "kh" # HANGUL JONGSEONG KIYEOK + CHOSEONG HIEUH
379
+ - pattern: "ᆨᄁ"
380
+ result: "kkk" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGKIYEOK
381
+ - pattern: "ᆨᄄ"
382
+ result: "ktt" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGTIEUT
383
+ - pattern: "ᆨᄈ"
384
+ result: "kpp" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGPIEUP
385
+ - pattern: "ᆨᄊ"
386
+ result: "kss" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGSIOS
387
+ - pattern: "ᆨᄍ"
388
+ result: "kjj" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGCIEUC
389
+ - pattern: "ᆫᄀ"
390
+ result: "n-g" # HANGUL JONGSEONG NIEUN + CHOSEONG KIEUK
391
+ - pattern: "ᆫᄂ"
392
+ result: "nn" # HANGUL JONGSEONG NIEUN + CHOSEONG NIEUN
393
+ - pattern: "ᆫᄃ"
394
+ result: "nd" # HANGUL JONGSEONG NIEUN + CHOSEONG TIEUT
395
+ - pattern: "ᆫᄅ"
396
+ result: "ll" # HANGUL JONGSEONG NIEUN + CHOSEONG RIEUL
397
+ - pattern: "ᆫᄆ"
398
+ result: "nm" # HANGUL JONGSEONG NIEUN + CHOSEONG MIEUM
399
+ - pattern: "ᆫᄇ"
400
+ result: "nb" # HANGUL JONGSEONG NIEUN + CHOSEONG PIEUP
401
+ - pattern: "ᆫᄉ"
402
+ result: "ns" # HANGUL JONGSEONG NIEUN + CHOSEONG SIOS
403
+ - pattern: "ᆫᄋ"
404
+ result: "n" # HANGUL JONGSEONG NIEUN + CHOSEONG IEUNG
405
+ - pattern: "ᆫᄌ"
406
+ result: "nj" # HANGUL JONGSEONG NIEUN + CHOSEONG CIEUC
407
+ - pattern: "ᆫᄎ"
408
+ result: "nch" # HANGUL JONGSEONG NIEUN + CHOSEONG CHIEUCH
409
+ - pattern: "ᆫᄏ"
410
+ result: "nkh" # HANGUL JONGSEONG NIEUN + CHOSEONG KHIEUKH
411
+ - pattern: "ᆫᄐ"
412
+ result: "nth" # HANGUL JONGSEONG NIEUN + CHOSEONG THIEUTH
413
+ - pattern: "ᆫᄑ"
414
+ result: "nph" # HANGUL JONGSEONG NIEUN + CHOSEONG PHIEUPH
415
+ - pattern: "ᆫᄒ"
416
+ result: "nh" # HANGUL JONGSEONG NIEUN + CHOSEONG HIEUH
417
+ - pattern: "ᆫᄁ"
418
+ result: "nkk" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGKIYEOK
419
+ - pattern: "ᆫᄄ"
420
+ result: "ntt" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGTIEUT
421
+ - pattern: "ᆫᄈ"
422
+ result: "npp" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGPIEUP
423
+ - pattern: "ᆫᄊ"
424
+ result: "nss" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGSIOS
425
+ - pattern: "ᆫᄍ"
426
+ result: "njj" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGCIEUC
427
+ - pattern: "ᆯᄀ"
428
+ result: "lk" # HANGUL JONGSEONG RIEUL + CHOSEONG KIYEOK
429
+ - pattern: "ᆯᄂ"
430
+ result: "ll" # HANGUL JONGSEONG RIEUL + CHOSEONG NIEUN
431
+ - pattern: "ᆯᄃ"
432
+ result: "lt" # HANGUL JONGSEONG RIEUL + CHOSEONG TIEUT
433
+ - pattern: "ᆯᄅ"
434
+ result: "ll" # HANGUL JONGSEONG RIEUL + CHOSEONG RIEUL
435
+ - pattern: "ᆯᄆ"
436
+ result: "lm" # HANGUL JONGSEONG RIEUL + CHOSEONG MIEUM
437
+ - pattern: "ᆯᄇ"
438
+ result: "lb" # HANGUL JONGSEONG RIEUL + CHOSEONG PIEUP
439
+ - pattern: "ᆯᄉ"
440
+ result: "ls" # HANGUL JONGSEONG RIEUL + CHOSEONG SIOS
441
+ - pattern: "ᆯᄋ"
442
+ result: "r" # HANGUL JONGSEONG RIEUL + CHOSEONG IEUNG
443
+ - pattern: "ᆯᄌ"
444
+ result: "lj" # HANGUL JONGSEONG RIEUL + CHOSEONG CIEUC
445
+ - pattern: "ᆯᄎ"
446
+ result: "lch" # HANGUL JONGSEONG RIEUL + CHOSEONG CHIEUCH
447
+ - pattern: "ᆯᄏ"
448
+ result: "lkh" # HANGUL JONGSEONG RIEUL + CHOSEONG KHIEUKH
449
+ - pattern: "ᆯᄐ"
450
+ result: "lth" # HANGUL JONGSEONG RIEUL + CHOSEONG THIEUTH
451
+ - pattern: "ᆯᄑ"
452
+ result: "lph" # HANGUL JONGSEONG RIEUL + CHOSEONG PHIEUPH
453
+ - pattern: "ᆯᄒ"
454
+ result: "lh" # HANGUL JONGSEONG RIEUL + CHOSEONG HIEUH
455
+ - pattern: "ᆯᄁ"
456
+ result: "lkk" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGKIYEOK
457
+ - pattern: "ᆯᄄ"
458
+ result: "ltt" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGTIEUT
459
+ - pattern: "ᆯᄈ"
460
+ result: "lpp" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGPIEUP
461
+ - pattern: "ᆯᄊ"
462
+ result: "lss" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGSIOS
463
+ - pattern: "ᆯᄍ"
464
+ result: "ljj" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGCIEUC
465
+ - pattern: "ᆷᄀ"
466
+ result: "mg" # HANGUL JONGSEONG MIEUM + CHOSEONG KIYEOK
467
+ - pattern: "ᆷᄂ"
468
+ result: "mn" # HANGUL JONGSEONG MIEUM + CHOSEONG NIEUN
469
+ - pattern: "ᆷᄃ"
470
+ result: "md" # HANGUL JONGSEONG MIEUM + CHOSEONG TIEUT
471
+ - pattern: "ᆷᄅ"
472
+ result: "mr" # HANGUL JONGSEONG MIEUM + CHOSEONG RIEUL # Note 3.1
473
+ - pattern: "ᆷᄆ"
474
+ result: "mm" # HANGUL JONGSEONG MIEUM + CHOSEONG MIEUM
475
+ - pattern: "ᆷᄇ"
476
+ result: "mb" # HANGUL JONGSEONG MIEUM + CHOSEONG PIEUP
477
+ - pattern: "ᆷᄉ"
478
+ result: "ms" # HANGUL JONGSEONG MIEUM + CHOSEONG SIOS
479
+ - pattern: "ᆷᄋ"
480
+ result: "m" # HANGUL JONGSEONG MIEUM + CHOSEONG IEUNG
481
+ - pattern: "ᆷᄌ"
482
+ result: "mj" # HANGUL JONGSEONG MIEUM + CHOSEONG CIEUC
483
+ - pattern: "ᆷᄎ"
484
+ result: "mch" # HANGUL JONGSEONG MIEUM + CHOSEONG CHIEUCH
485
+ - pattern: "ᆷᄏ"
486
+ result: "mkh" # HANGUL JONGSEONG MIEUM + CHOSEONG KHIEUKH
487
+ - pattern: "ᆷᄐ"
488
+ result: "mth" # HANGUL JONGSEONG MIEUM + CHOSEONG THIEUTH
489
+ - pattern: "ᆷᄑ"
490
+ result: "mph" # HANGUL JONGSEONG MIEUM + CHOSEONG PHIEUPH
491
+ - pattern: "ᆷᄒ"
492
+ result: "mh" # HANGUL JONGSEONG MIEUM + CHOSEONG HIEUH
493
+ - pattern: "ᆷᄁ"
494
+ result: "mkk" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGKIYEOK
495
+ - pattern: "ᆷᄄ"
496
+ result: "mtt" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGTIEUT
497
+ - pattern: "ᆷᄈ"
498
+ result: "mpp" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGPIEUP
499
+ - pattern: "ᆷᄊ"
500
+ result: "mss" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGSIOS
501
+ - pattern: "ᆷᄍ"
502
+ result: "mjj" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGCIEUC
503
+ - pattern: "ᆸᄀ"
504
+ result: "pk" # HANGUL JONGSEONG PIEUP + CHOSEONG KIYEOK
505
+ - pattern: "ᆸᄂ"
506
+ result: "mn" # HANGUL JONGSEONG PIEUP + CHOSEONG NIEUN
507
+ - pattern: "ᆸᄃ"
508
+ result: "pt" # HANGUL JONGSEONG PIEUP + CHOSEONG TIEUT
509
+ - pattern: "ᆸᄅ"
510
+ result: "mr" # HANGUL JONGSEONG PIEUP + CHOSEONG RIEUL
511
+ - pattern: "ᆸᄆ"
512
+ result: "mm" # HANGUL JONGSEONG PIEUP + CHOSEONG MIEUM
513
+ - pattern: "ᆸᄇ"
514
+ result: "pp" # HANGUL JONGSEONG PIEUP + CHOSEONG PIEUP
515
+ - pattern: "ᆸᄉ"
516
+ result: "ps" # HANGUL JONGSEONG PIEUP + CHOSEONG SIOS
517
+ - pattern: "ᆸᄋ"
518
+ result: "b" # HANGUL JONGSEONG PIEUP + CHOSEONG IEUNG
519
+ - pattern: "ᆸᄌ"
520
+ result: "pj" # HANGUL JONGSEONG PIEUP + CHOSEONG CIEUC
521
+ - pattern: "ᆸᄎ"
522
+ result: "pch" # HANGUL JONGSEONG PIEUP + CHOSEONG CHIEUCH
523
+ - pattern: "ᆸᄏ"
524
+ result: "pkh" # HANGUL JONGSEONG PIEUP + CHOSEONG KHIEUKH
525
+ - pattern: "ᆸᄐ"
526
+ result: "pth" # HANGUL JONGSEONG PIEUP + CHOSEONG THIEUTH
527
+ - pattern: "ᆸᄑ"
528
+ result: "pph" # HANGUL JONGSEONG PIEUP + CHOSEONG PHIEUPH
529
+ - pattern: "ᆸᄒ"
530
+ result: "ph" # HANGUL JONGSEONG PIEUP + CHOSEONG HIEUH
531
+ - pattern: "ᆸᄁ"
532
+ result: "pkk" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGKIYEOK
533
+ - pattern: "ᆸᄄ"
534
+ result: "ptt" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGTIEUT
535
+ - pattern: "ᆸᄈ"
536
+ result: "ppp" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGPIEUP
537
+ - pattern: "ᆸᄊ"
538
+ result: "pss" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGSIOS
539
+ - pattern: "ᆸᄍ"
540
+ result: "pjj" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGCIEUC
541
+ - pattern: "ᆺᄀ"
542
+ result: "tk" # HANGUL JONGSEONG SIOS + CHOSEONG KIYEOK
543
+ - pattern: "ᆺᄂ"
544
+ result: "nn" # HANGUL JONGSEONG SIOS + CHOSEONG NIEUN
545
+ - pattern: "ᆺᄃ"
546
+ result: "tt" # HANGUL JONGSEONG SIOS + CHOSEONG TIEUT
547
+ - pattern: "ᆺᄅ"
548
+ result: "nr" # HANGUL JONGSEONG SIOS + CHOSEONG RIEUL # Note 3.1
549
+ - pattern: "ᆺᄆ"
550
+ result: "nm" # HANGUL JONGSEONG SIOS + CHOSEONG MIEUM
551
+ - pattern: "ᆺᄇ"
552
+ result: "tp" # HANGUL JONGSEONG SIOS + CHOSEONG PIEUP
553
+ - pattern: "ᆺᄉ"
554
+ result: "ts" # HANGUL JONGSEONG SIOS + CHOSEONG SIOS
555
+ - pattern: "ᆺᄋ"
556
+ result: "d" # HANGUL JONGSEONG SIOS + CHOSEONG IEUNG
557
+ - pattern: "ᆺᄌ"
558
+ result: "tj" # HANGUL JONGSEONG SIOS + CHOSEONG CIEUC
559
+ - pattern: "ᆺᄎ"
560
+ result: "tch" # HANGUL JONGSEONG SIOS + CHOSEONG CHIEUCH
561
+ - pattern: "ᆺᄏ"
562
+ result: "tkh" # HANGUL JONGSEONG SIOS + CHOSEONG KHIEUKH
563
+ - pattern: "ᆺᄐ"
564
+ result: "tth" # HANGUL JONGSEONG SIOS + CHOSEONG THIEUTH
565
+ - pattern: "ᆺᄑ"
566
+ result: "tph" # HANGUL JONGSEONG SIOS + CHOSEONG PHIEUPH
567
+ - pattern: "ᆺᄒ"
568
+ result: "th" # HANGUL JONGSEONG SIOS + CHOSEONG HIEUH
569
+ - pattern: "ᆺᄁ"
570
+ result: "tkk" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGKIYEOK
571
+ - pattern: "ᆺᄄ"
572
+ result: "ttt" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGTIEUT
573
+ - pattern: "ᆺᄈ"
574
+ result: "tpp" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGPIEUP
575
+ - pattern: "ᆺᄊ"
576
+ result: "tss" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGSIOS
577
+ - pattern: "ᆺᄍ"
578
+ result: "tjj" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGCIEUC
579
+ - pattern: "ᆼᄀ"
580
+ result: "ngg" # HANGUL JONGSEONG IEUNG + CHOSEONG KIYEOK
581
+ - pattern: "ᆼᄂ"
582
+ result: "ngn" # HANGUL JONGSEONG IEUNG + CHOSEONG NIEUN
583
+ - pattern: "ᆼᄃ"
584
+ result: "ngd" # HANGUL JONGSEONG IEUNG + CHOSEONG TIEUT
585
+ - pattern: "ᆼᄅ"
586
+ result: "ngn" # HANGUL JONGSEONG IEUNG + CHOSEONG RIEUL
587
+ - pattern: "ᆼᄆ"
588
+ result: "ngm" # HANGUL JONGSEONG IEUNG + CHOSEONG MIEUM
589
+ - pattern: "ᆼᄇ"
590
+ result: "ngb" # HANGUL JONGSEONG IEUNG + CHOSEONG PIEUP
591
+ - pattern: "ᆼᄉ"
592
+ result: "ngs" # HANGUL JONGSEONG IEUNG + CHOSEONG SIOS
593
+ - pattern: "ᆼᄋ"
594
+ result: "ng" # HANGUL JONGSEONG IEUNG + CHOSEONG IEUNG
595
+ - pattern: "ᆼᄌ"
596
+ result: "ngj" # HANGUL JONGSEONG IEUNG + CHOSEONG CIEUC
597
+ - pattern: "ᆼᄎ"
598
+ result: "ngch" # HANGUL JONGSEONG IEUNG + CHOSEONG CHIEUCH
599
+ - pattern: "ᆼᄏ"
600
+ result: "ngkh" # HANGUL JONGSEONG IEUNG + CHOSEONG KHIEUKH
601
+ - pattern: "ᆼᄐ"
602
+ result: "ngth" # HANGUL JONGSEONG IEUNG + CHOSEONG THIEUTH
603
+ - pattern: "ᆼᄑ"
604
+ result: "ngph" # HANGUL JONGSEONG IEUNG + CHOSEONG PHIEUPH
605
+ - pattern: "ᆼᄒ"
606
+ result: "ngh" # HANGUL JONGSEONG IEUNG + CHOSEONG HIEUH
607
+ - pattern: "ᆼᄁ"
608
+ result: "ngkk" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGKIYEOK
609
+ - pattern: "ᆼᄄ"
610
+ result: "ngtt" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGTIEUT
611
+ - pattern: "ᆼᄈ"
612
+ result: "ngpp" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGPIEUP
613
+ - pattern: "ᆼᄊ"
614
+ result: "ngss" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGSIOS
615
+ - pattern: "ᆼᄍ"
616
+ result: "ngjj" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGCIEUC
617
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄀ"
618
+ result: "g" # VOWEL + CHOSEONG KIYEOK # c.f. Note 3.3
619
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄂ"
620
+ result: "n" # VOWEL + CHOSEONG NIEUN # c.f. Note 3.3
621
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄃ"
622
+ result: "d" # VOWEL + CHOSEONG TIEUT # c.f. Note 3.3
623
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄅ"
624
+ result: "r" # VOWEL + CHOSEONG RIEUL
625
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄆ"
626
+ result: "m" # VOWEL + CHOSEONG MIEUM # c.f. Note 3.3
627
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄇ"
628
+ result: "b" # VOWEL + CHOSEONG PIEUP # c.f. Note 3.3
629
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄉ"
630
+ result: "s" # VOWEL + CHOSEONG SIOS
631
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄋ"
632
+ result: "" # VOWEL + CHOSEONG IEUNG
633
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄌ"
634
+ result: "j" # VOWEL + CHOSEONG CIEUC
635
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄎ"
636
+ result: "ch" # VOWEL + CHOSEONG CHIEUCH
637
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄏ"
638
+ result: "kh" # VOWEL + CHOSEONG KHIEUKH
639
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄐ"
640
+ result: "th" # VOWEL + CHOSEONG THIEUTH
641
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄑ"
642
+ result: "ph" # VOWEL + CHOSEONG PHIEUPH
643
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄒ"
644
+ result: "h" # VOWEL + CHOSEONG HIEUH
645
+ - pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄁ"
646
+ result: "kk" # VOWEL + CHOSEONG SSANGKIYEOK
647
+ - pattern: "(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄄ"
648
+ result: "tt" # VOWEL + CHOSEONG SSANGTIEUT
649
+ - pattern: "(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄈ"
650
+ result: "pp" # VOWEL + CHOSEONG SSANGPIEUP
651
+ - pattern: "(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄊ"
652
+ result: "ss" # VOWEL + CHOSEONG SSANGSIOS
653
+ - pattern: "(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄍ"
654
+ result: "jj" # VOWEL + CHOSEONG SSANGCIEUC
655
+ - pattern: "ᆰᄀ"
656
+ result: "lg" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG KIYEOK
657
+ - pattern: "ᆰᄂ"
658
+ result: "ngn" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG NIEUN
659
+ - pattern: "ᆰᄃ"
660
+ result: "kt" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG TIEUT
661
+ - pattern: "ᆰᄅ"
662
+ result: "ngn" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG RIEUL
663
+ - pattern: "ᆰᄆ"
664
+ result: "ngm" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG MIEUM
665
+ - pattern: "ᆰᄇ"
666
+ result: "kp" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG PIEUP
667
+ - pattern: "ᆰᄉ"
668
+ result: "ks" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SIOS
669
+ - pattern: "ᆰᄋ"
670
+ result: "lg" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG IEUNG
671
+ - pattern: "ᆰᄌ"
672
+ result: "kj" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG CIEUC
673
+ - pattern: "ᆰᄎ"
674
+ result: "kch" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG CHIEUCH
675
+ - pattern: "ᆰᄏ"
676
+ result: "lkh" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG KHIEUKH
677
+ - pattern: "ᆰᄐ"
678
+ result: "kth" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG THIEUTH
679
+ - pattern: "ᆰᄑ"
680
+ result: "kph" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG PHIEUPH
681
+ - pattern: "ᆰᄒ"
682
+ result: "lkh" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG HIEUH
683
+ - pattern: "ᆰᄁ"
684
+ result: "lkk" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGKIYEOK
685
+ - pattern: "ᆰᄄ"
686
+ result: "ktt" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGTIEUT
687
+ - pattern: "ᆰᄈ"
688
+ result: "kpp" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGPIEUP
689
+ - pattern: "ᆰᄊ"
690
+ result: "kss" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGSIOS
691
+ - pattern: "ᆰᄍ"
692
+ result: "kjj" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGCIEUC
693
+ - pattern: "ᆱᄀ"
694
+ result: "mg" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG KIYEOK
695
+ - pattern: "ᆱᄂ"
696
+ result: "mn" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG NIEUN
697
+ - pattern: "ᆱᄃ"
698
+ result: "md" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG TIEUT
699
+ - pattern: "ᆱᄅ"
700
+ result: "mr" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG RIEUL
701
+ - pattern: "ᆱᄆ"
702
+ result: "lm" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG MIEUM
703
+ - pattern: "ᆱᄇ"
704
+ result: "mb" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG PIEUP
705
+ - pattern: "ᆱᄉ"
706
+ result: "ms" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SIOS
707
+ - pattern: "ᆱᄋ"
708
+ result: "lm" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG IEUNG
709
+ - pattern: "ᆱᄌ"
710
+ result: "mj" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG CIEUC
711
+ - pattern: "ᆱᄎ"
712
+ result: "mch" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG CHIEUCH
713
+ - pattern: "ᆱᄏ"
714
+ result: "mkh" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG KHIEUKH
715
+ - pattern: "ᆱᄐ"
716
+ result: "mth" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG THIEUTH
717
+ - pattern: "ᆱᄑ"
718
+ result: "mph" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG PHIEUPH
719
+ - pattern: "ᆱᄒ"
720
+ result: "mh" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG HIEUH
721
+ - pattern: "ᆱᄁ"
722
+ result: "mkk" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGKIYEOK
723
+ - pattern: "ᆱᄄ"
724
+ result: "mtt" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGTIEUT
725
+ - pattern: "ᆱᄈ"
726
+ result: "mpp" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGPIEUP
727
+ - pattern: "ᆱᄊ"
728
+ result: "mss" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGSIOS
729
+ - pattern: "ᆱᄍ"
730
+ result: "mjj" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGCIEUC
731
+ - pattern: "ᆲᄀ"
732
+ result: "pk" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG KIYEOK
733
+ - pattern: "ᆲᄂ"
734
+ result: "mn" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG NIEUN
735
+ - pattern: "ᆲᄃ"
736
+ result: "pt" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG TIEUT
737
+ - pattern: "ᆲᄅ"
738
+ result: "mr" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG RIEUL
739
+ - pattern: "ᆲᄆ"
740
+ result: "mm" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG MIEUM
741
+ - pattern: "ᆲᄇ"
742
+ result: "lb" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG PIEUP
743
+ - pattern: "ᆲᄉ"
744
+ result: "ps" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SIOS
745
+ - pattern: "ᆲᄋ"
746
+ result: "lb" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG IEUNG
747
+ - pattern: "ᆲᄌ"
748
+ result: "pj" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG CIEUC
749
+ - pattern: "ᆲᄎ"
750
+ result: "pch" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG CHIEUCH
751
+ - pattern: "ᆲᄏ"
752
+ result: "pkh" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG KHIEUKH
753
+ - pattern: "ᆲᄐ"
754
+ result: "pth" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG THIEUTH
755
+ - pattern: "ᆲᄑ"
756
+ result: "lph" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG PHIEUPH
757
+ - pattern: "ᆲᄒ"
758
+ result: "lph" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG HIEUH
759
+ - pattern: "ᆲᄁ"
760
+ result: "pkk" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGKIYEOK
761
+ - pattern: "ᆲᄄ"
762
+ result: "ptt" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGTIEUT
763
+ - pattern: "ᆲᄈ"
764
+ result: "lpp" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGPIEUP
765
+ - pattern: "ᆲᄊ"
766
+ result: "pss" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGSIOS
767
+ - pattern: "ᆲᄍ"
768
+ result: "pjj" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGCIEUC
769
+ - pattern: "(?<= )ᄀ"
770
+ result: "k" # HANGUL CHOSEONG KIYEOK
771
+ - pattern: "(?<= )ᄂ"
772
+ result: "n" # HANGUL CHOSEONG NIEUN
773
+ - pattern: "(?<= )ᄃ"
774
+ result: "t" # HANGUL CHOSEONG TIEUT
775
+
776
+ # DPRK does not follow the R-onset rule
777
+ # - pattern: "(?<= )ᄅ(?=[ᅣᅤᅧᅨᅭᅲ])"
778
+ # result: "" # HANGUL CHOSEONG RIEUL # R-onset rule
779
+ - pattern: "(?<= )ᄅ"
780
+ # result: "n" # HANGUL CHOSEONG RIEUL
781
+ result: "r"
782
+
783
+ - pattern: "(?<= )ᄆ"
784
+ result: "m" # HANGUL CHOSEONG MIEUM
785
+ - pattern: "(?<= )ᄇ"
786
+ result: "p" # HANGUL CHOSEONG PIEUP
787
+ - pattern: "(?<= )ᄉ"
788
+ result: "s" # HANGUL CHOSEONG SIOS
789
+ - pattern: "(?<= )ᄋ"
790
+ result: "" # HANGUL CHOSEONG IEUNG
791
+ - pattern: "(?<= )ᄌ"
792
+ result: "j" # HANGUL CHOSEONG CIEUC
793
+ - pattern: "(?<= )ᄎ"
794
+ result: "ch" # HANGUL CHOSEONG CHIEUCH
795
+ - pattern: "(?<= )ᄏ"
796
+ result: "kh" # HANGUL CHOSEONG KHIEUKH
797
+ - pattern: "(?<= )ᄐ"
798
+ result: "th" # HANGUL CHOSEONG THIEUTH
799
+ - pattern: "(?<= )ᄑ"
800
+ result: "ph" # HANGUL CHOSEONG PHIEUPH
801
+ - pattern: "(?<= )ᄒ"
802
+ result: "h" # HANGUL CHOSEONG HIEUH
803
+ - pattern: "(?<= )ᄁ"
804
+ result: "kk" # HANGUL CHOSEONG SSANGKIYEOK
805
+ - pattern: "(?<= )ᄭ"
806
+ result: "kk" # HANGUL CHOSEONG SIOS-KIYEOK
807
+ - pattern: "(?<= )ᄄ"
808
+ result: "tt" # HANGUL CHOSEONG SSANGTIEUT
809
+ - pattern: "(?<= )ᄯ"
810
+ result: "tt" # HANGUL CHOSEONG SIOS-TIEUT
811
+ - pattern: "(?<= )ᄈ"
812
+ result: "pp" # HANGUL CHOSEONG SSANGPIEUP
813
+ - pattern: "(?<= )ᄲ"
814
+ result: "pp" # HANGUL CHOSEONG SIOS-PIEUP
815
+ - pattern: "(?<= )ᄊ"
816
+ result: "ss" # HANGUL CHOSEONG SSANGSIOS
817
+ - pattern: "(?<= )ᄍ"
818
+ result: "jj" # HANGUL CHOSEONG SSANGCIEUC
819
+ - pattern: "(?<= )ᄶ"
820
+ result: "jj" # HANGUL CHOSEONG SIOS-CIEUC
821
+ - pattern: "ᅡ"
822
+ result: "a" # HANGUL JUNGSEONG A
823
+ - pattern: "ᅣ"
824
+ result: "ya" # HANGUL JUNGSEONG YA
825
+ - pattern: "ᅥ"
826
+ result: "ŏ" # HANGUL JUNGSEONG EO
827
+ - pattern: "ᅧ"
828
+ result: "yŏ" # HANGUL JUNGSEONG YEO
829
+ - pattern: "ᅩ"
830
+ result: "o" # HANGUL JUNGSEONG O
831
+ - pattern: "ᅭ"
832
+ result: "yo" # HANGUL JUNGSEONG YO
833
+ - pattern: "ᅮ"
834
+ result: "u" # HANGUL JUNGSEONG U
835
+ - pattern: "ᅲ"
836
+ result: "yu" # HANGUL JUNGSEONG YU
837
+ - pattern: "ᅳ"
838
+ result: "ü" # HANGUL JUNGSEONG EU
839
+ - pattern: "ᅵ"
840
+ result: "i" # HANGUL JUNGSEONG I
841
+ - pattern: "ᅢ"
842
+ result: "ae" # HANGUL JUNGSEONG AE
843
+ - pattern: "ᅤ"
844
+ result: "yae" # HANGUL JUNGSEONG YAE
845
+ - pattern: "ᅦ"
846
+ result: "e" # HANGUL JUNGSEONG E
847
+ - pattern: "ᅨ"
848
+ result: "ye" # HANGUL JUNGSEONG YE
849
+ - pattern: "ᅬ"
850
+ result: "oe" # HANGUL JUNGSEONG OE
851
+ - pattern: "ᅱ"
852
+ result: "wi" # HANGUL JUNGSEONG WI
853
+ - pattern: "ᅴ"
854
+ result: "üi" # HANGUL JUNGSEONG YI
855
+ - pattern: "ᅪ"
856
+ result: "wa" # HANGUL JUNGSEONG WA
857
+ - pattern: "ᅯ"
858
+ result: "wo" # HANGUL JUNGSEONG WEO
859
+ - pattern: "ᅫ"
860
+ result: "wae" # HANGUL JUNGSEONG WAE
861
+ - pattern: "ᅰ"
862
+ result: "we" # HANGUL JUNGSEONG WE
863
+ - pattern: "ᆨ(?=[ A-Za-z0-9-])"
864
+ result: "k" # HANGUL JONGSEONG KIYEOK
865
+ - pattern: "ᆫ(?=[ A-Za-z0-9-])"
866
+ result: "n" # HANGUL JONGSEONG NIEUN
867
+ - pattern: "ᆮ(?=[ A-Za-z0-9-])"
868
+ result: "t" # HANGUL JONGSEONG TIEUT
869
+ - pattern: "ᆯ(?=[ A-Za-z0-9-])"
870
+ result: "l" # HANGUL JONGSEONG RIEUL
871
+ - pattern: "ᆷ(?=[ A-Za-z0-9-])"
872
+ result: "m" # HANGUL JONGSEONG MIEUM
873
+ - pattern: "ᆸ(?=[ A-Za-z0-9-])"
874
+ result: "p" # HANGUL JONGSEONG PIEUP
875
+ - pattern: "ᆺ(?=[ A-Za-z0-9-])"
876
+ result: "t" # HANGUL JONGSEONG SIOS
877
+ - pattern: "ᆼ(?=[ A-Za-z0-9-])"
878
+ result: "ng" # HANGUL JONGSEONG IEUNG
879
+ - pattern: "ᆽ(?=[ A-Za-z0-9-])"
880
+ result: "t" # HANGUL JONGSEONG CIEUC
881
+ - pattern: "ᆾ(?=[ A-Za-z0-9-])"
882
+ result: "t" # HANGUL JONGSEONG CHIEUCH
883
+ - pattern: "ᆿ(?=[ A-Za-z0-9-])"
884
+ result: "k" # HANGUL JONGSEONG KHIEUKH
885
+ - pattern: "ᇀ(?=[ A-Za-z0-9-])"
886
+ result: "t" # HANGUL JONGSEONG THIEUTH
887
+ - pattern: "ᇁ(?=[ A-Za-z0-9-])"
888
+ result: "p" # HANGUL JONGSEONG PHIEUPH
889
+ - pattern: "ᆰ(?=[ A-Za-z0-9-])"
890
+ result: "k" # HANGUL JONGSEONG RIEUL-KIYEOK
891
+ - pattern: "ᆲ(?=[ A-Za-z0-9-])"
892
+ result: "p" # HANGUL JONGSEONG RIEUL-PIEUP
893
+
894
+ # Remove space added
895
+ - pattern: "^ "
896
+ result: ""
897
+ - pattern: " $"
898
+ result: ""
899
+
900
+ characters:
901
+ # This is based on Jamo