interscript 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +246 -14
- data/bin/interscript +38 -17
- data/bin/setup +8 -0
- data/lib/g2pwrapper.py +34 -0
- data/lib/interscript.rb +140 -16
- data/lib/interscript/command.rb +27 -0
- data/lib/interscript/mapping.rb +125 -0
- data/lib/interscript/version.rb +1 -1
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
- data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
- data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
- data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
- data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
- data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
- data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
- data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
- data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
- data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
- data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
- data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
- data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
- data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
- data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
- data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
- data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
- data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
- data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
- data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
- data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
- data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
- data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
- data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
- data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
- data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
- data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
- data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
- data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
- data/spec/interscript/mapping_spec.rb +42 -0
- data/spec/interscript_spec.rb +20 -5
- data/spec/spec_helper.rb +3 -1
- metadata +149 -24
- data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
- data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
- data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
- data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
- data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
- data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
- data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -0,0 +1,41 @@
|
|
1
|
+
---
|
2
|
+
authority_id: iso
|
3
|
+
id: 1997
|
4
|
+
language: ell
|
5
|
+
source_script: Grek
|
6
|
+
destination_script: Latn
|
7
|
+
name: ISO 843:1997
|
8
|
+
url:
|
9
|
+
creation_date: 1997
|
10
|
+
description: |
|
11
|
+
ISO Transcription table for Greek
|
12
|
+
|
13
|
+
note:
|
14
|
+
- Transliteration of Greek into Latin: Type 2, Clause 3 Table 2
|
15
|
+
- Introduced casing to digamma, yot, and lunate sigma. (Casing was late introduction to character sets for those characters)
|
16
|
+
|
17
|
+
tests:
|
18
|
+
|
19
|
+
- source: |
|
20
|
+
Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί, και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι· όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ. Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος. Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»· όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ». Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
|
21
|
+
|
22
|
+
Γιάννης Μακρυγιάννης.
|
23
|
+
|
24
|
+
expected: |
|
25
|
+
Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrída tin échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai ftochoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o katheís, échomen na zísomen edó. To loipón doulépsamen óloi mazí, na tin fylámen ki óloi mazí kai na min légei oúte o dynatós «egó» oúte o adýnatos. Xérete póte na légei o katheís «egó»? Ótan agonisteí mónos tou kai fkiásei í chalásei, na légei «egó»; ótan ómos agonízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó». Kai eis to exís na máthomen gnósi, an thélomen na fkiásomen chorión, na zísomen óloi mazí.
|
26
|
+
|
27
|
+
Giánnis Makrygiánnis.
|
28
|
+
|
29
|
+
map:
|
30
|
+
character_separator: ""
|
31
|
+
word_separator: " "
|
32
|
+
inherit: "elot-ell-Grek-Latn-743-1982-ts"
|
33
|
+
|
34
|
+
characters:
|
35
|
+
"\u03DC": "W" # Ϝ
|
36
|
+
"\u03DD": "w" # ϝ
|
37
|
+
"\u03F2": "s" # ϲ
|
38
|
+
"\u03F9": "S" # Ϲ
|
39
|
+
"\u03F3": "j"
|
40
|
+
"\u037F": "j"
|
41
|
+
|
@@ -0,0 +1,62 @@
|
|
1
|
+
---
|
2
|
+
authority_id: iso
|
3
|
+
id: 3602-1989
|
4
|
+
language: jpn
|
5
|
+
source_script: Hrkt
|
6
|
+
destination_script: Latn
|
7
|
+
name: ISO 3602 Romanization of Japanese (Kana Script)
|
8
|
+
url:
|
9
|
+
creation_date:
|
10
|
+
adoption_date:
|
11
|
+
description:
|
12
|
+
|
13
|
+
notes:
|
14
|
+
|
15
|
+
tests:
|
16
|
+
- source: かんおう
|
17
|
+
expected: kan’ô
|
18
|
+
- source: かのう
|
19
|
+
expected: kanô
|
20
|
+
- source: きんゆう
|
21
|
+
expected: kin’yû
|
22
|
+
- source: とうきょう
|
23
|
+
expected: tôkyô
|
24
|
+
- source: がっ•こう
|
25
|
+
expected: gakkô
|
26
|
+
- source: かごっま
|
27
|
+
expected: kagomma
|
28
|
+
- source: ぽっぽっや
|
29
|
+
expected: poppoyya
|
30
|
+
- source: てっら
|
31
|
+
expected: terra
|
32
|
+
- source: にゃっほー
|
33
|
+
expected: nyahhô
|
34
|
+
- source: ゴッホ
|
35
|
+
expected: gohho
|
36
|
+
- source: おも•う
|
37
|
+
expected: omou
|
38
|
+
- source: こうし
|
39
|
+
expected: kôsi
|
40
|
+
- source: こう•し #格子
|
41
|
+
expected: kôsi
|
42
|
+
- source: こ•うし #子牛
|
43
|
+
expected: kousi
|
44
|
+
- source: ぎゃあ
|
45
|
+
expected: gyâ
|
46
|
+
|
47
|
+
map:
|
48
|
+
inherit: mext-jpn-Hrkt-Latn-1954
|
49
|
+
|
50
|
+
rules:
|
51
|
+
# Remove morpheme boundary marker after sokuon っ/ッ
|
52
|
+
- pattern: "([っッ])•"
|
53
|
+
result: "\\1"
|
54
|
+
|
55
|
+
postrules:
|
56
|
+
# Remove morpheme boundary marker
|
57
|
+
- pattern: "•"
|
58
|
+
result: ""
|
59
|
+
|
60
|
+
# Use ’ instead of '
|
61
|
+
- pattern: "'"
|
62
|
+
result: "’"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
---
|
2
2
|
authority_id: iso
|
3
|
-
id:
|
3
|
+
id: 9-1995
|
4
4
|
language: rus
|
5
5
|
source_script: Cyrl
|
6
6
|
destination_script: Latn
|
@@ -14,8 +14,7 @@ description: |
|
|
14
14
|
Cyrillic alphabetic order, the 118 single or diacritic-carrying
|
15
15
|
characters that appear in one or another of the considered alphabets.
|
16
16
|
tests:
|
17
|
-
|
18
|
-
expected:
|
17
|
+
|
19
18
|
|
20
19
|
map:
|
21
20
|
characters:
|
@@ -0,0 +1,109 @@
|
|
1
|
+
---
|
2
|
+
authority_id: iso
|
3
|
+
id: 11940-1998
|
4
|
+
language: tha
|
5
|
+
source_script: Thai
|
6
|
+
destination_script: Latn
|
7
|
+
name: ISO 11940:1998 Information and documentation -- Transliteration of Thai
|
8
|
+
url: https://www.iso.org/standard/20574.html
|
9
|
+
creation_date: 1998
|
10
|
+
adoption_date:
|
11
|
+
description:
|
12
|
+
|
13
|
+
notes:
|
14
|
+
|
15
|
+
tests:
|
16
|
+
- source: 'ภาษาไทย'
|
17
|
+
expected: 'p̣hās̛̄āịthy'
|
18
|
+
- source: 'เชียงใหม่'
|
19
|
+
expected: 'echīyngıh̄m̀'
|
20
|
+
|
21
|
+
map:
|
22
|
+
|
23
|
+
characters:
|
24
|
+
'\u0e01': 'k' # ก THAI CHARACTER KO KAI
|
25
|
+
'\u0e02': 'k̄h' # ข THAI CHARACTER KHO KHAI
|
26
|
+
'\u0e03': 'ḳ̄h' # ฃ THAI CHARACTER KHO KHUAT
|
27
|
+
'\u0e04': 'kh' # ค THAI CHARACTER KHO KHWAI
|
28
|
+
'\u0e05': 'k̛h' # ฅ THAI CHARACTER KHO KHON
|
29
|
+
'\u0e06': 'ḳh' # ฆ THAI CHARACTER KHO RAKHANG
|
30
|
+
'\u0e07': 'ng' # ง THAI CHARACTER NGO NGU
|
31
|
+
'\u0e08': 'c' # จ THAI CHARACTER CHO CHAN
|
32
|
+
'\u0e09': 'c̄h' # ฉ THAI CHARACTER CHO CHING
|
33
|
+
'\u0e0a': 'ch' # ช THAI CHARACTER CHO CHANG
|
34
|
+
'\u0e0b': 's' # ซ THAI CHARACTER SO SO
|
35
|
+
'\u0e0c': 'c̣h' # ฌ THAI CHARACTER CHO CHOE
|
36
|
+
'\u0e0d': 'ỵ' # ญ THAI CHARACTER YO YING
|
37
|
+
'\u0e0e': 'ḍ' # ฎ THAI CHARACTER DO CHADA
|
38
|
+
'\u0e0f': 'ṭ' # ฏ THAI CHARACTER TO PATAK
|
39
|
+
'\u0e10': 'ṭ̄h' # ฐ THAI CHARACTER THO THAN
|
40
|
+
'\u0e11': 'ṯh' # ฑ THAI CHARACTER THO NANGMONTHO
|
41
|
+
'\u0e12': 't̛h' # ฒ THAI CHARACTER THO PHUTHAO
|
42
|
+
'\u0e13': 'ṇ' # ณ THAI CHARACTER NO NEN
|
43
|
+
'\u0e14': 'd' # ด THAI CHARACTER DO DEK
|
44
|
+
'\u0e15': 't' # ต THAI CHARACTER TO TAO
|
45
|
+
'\u0e16': 't̄h' # ถ THAI CHARACTER THO THUNG
|
46
|
+
'\u0e17': 'th' # ท THAI CHARACTER THO THAHAN
|
47
|
+
'\u0e18': 'ṭh' # ธ THAI CHARACTER THO THONG
|
48
|
+
'\u0e19': 'n' # น THAI CHARACTER NO NU
|
49
|
+
'\u0e1a': 'b' # บ THAI CHARACTER BO BAIMAI
|
50
|
+
'\u0e1b': 'p' # ป THAI CHARACTER PO PLA
|
51
|
+
'\u0e1c': 'p̄h' # ผ THAI CHARACTER PHO PHUNG
|
52
|
+
'\u0e1d': 'f̄' # ฝ THAI CHARACTER FO FA
|
53
|
+
'\u0e1e': 'ph' # พ THAI CHARACTER PHO PHAN
|
54
|
+
'\u0e1f': 'f' # ฟ THAI CHARACTER FO FAN
|
55
|
+
'\u0e20': 'p̣h' # ภ THAI CHARACTER PHO SAMPHAO
|
56
|
+
'\u0e21': 'm' # ม THAI CHARACTER MO MA
|
57
|
+
'\u0e22': 'y' # ย THAI CHARACTER YO YAK
|
58
|
+
'\u0e23': 'r' # ร THAI CHARACTER RO RUA
|
59
|
+
'\u0e24': 'v' # ฤ THAI CHARACTER RU
|
60
|
+
'\u0e25': 'l' # ล THAI CHARACTER LO LING
|
61
|
+
'\u0e26': 'ł' # ฦ THAI CHARACTER LU
|
62
|
+
'\u0e27': 'w' # ว THAI CHARACTER WO WAEN
|
63
|
+
'\u0e28': 'ṣ̄' # ศ THAI CHARACTER SO SALA
|
64
|
+
'\u0e29': 's̛̄' # ษ THAI CHARACTER SO RUSI
|
65
|
+
'\u0e2a': 's̄' # ส THAI CHARACTER SO SUA
|
66
|
+
'\u0e2b': 'h̄' # ห THAI CHARACTER HO HIP
|
67
|
+
'\u0e2c': 'ḷ' # ฬ THAI CHARACTER LO CHULA
|
68
|
+
'\u0e2d': 'x' # อ THAI CHARACTER O ANG
|
69
|
+
'\u0e2e': 'ḥ' # ฮ THAI CHARACTER HO NOKHUK
|
70
|
+
'\u0e2f': 'ǂ' # ฯ THAI CHARACTER PAIYANNOI
|
71
|
+
'\u0e30': 'a' # ะ THAI CHARACTER SARA A
|
72
|
+
'\u0e31': 'ạ' # ั THAI CHARACTER MAI HAN-AKAT
|
73
|
+
'\u0e32': 'ā' # า THAI CHARACTER SARA AA
|
74
|
+
'\u0e33': 'å' # ำ THAI CHARACTER SARA AM
|
75
|
+
'\u0e34': 'i' # ิ THAI CHARACTER SARA I
|
76
|
+
'\u0e35': 'ī' # ี THAI CHARACTER SARA II
|
77
|
+
'\u0e36': 'ụ' # ึ THAI CHARACTER SARA UE
|
78
|
+
'\u0e37': 'ụ̄' # ื THAI CHARACTER SARA UEE
|
79
|
+
'\u0e38': 'u' # ุ THAI CHARACTER SARA U
|
80
|
+
'\u0e39': 'ū' # ู THAI CHARACTER SARA UU
|
81
|
+
'\u0e3a': '–̥' # ฺ THAI CHARACTER PHINTHU
|
82
|
+
'\u0e40': 'e' # เ THAI CHARACTER SARA E
|
83
|
+
'\u0e41': 'æ' # แ THAI CHARACTER SARA AE
|
84
|
+
'\u0e42': 'o' # โ THAI CHARACTER SARA O
|
85
|
+
'\u0e43': 'ı' # ใ THAI CHARACTER SARA AI MAIMUAN
|
86
|
+
'\u0e44': 'ị' # ไ THAI CHARACTER SARA AI MAIMALAI
|
87
|
+
'\u0e45': 'ɨ' # ๅ THAI CHARACTER LAKKHANGYAO
|
88
|
+
'\u0e46': '«' # ๆ THAI CHARACTER MAIYAMOK
|
89
|
+
'\u0e47': '̆' # ็ THAI CHARACTER MAITAIKHU
|
90
|
+
'\u0e48': '̀' # ่ THAI CHARACTER MAI EK
|
91
|
+
'\u0e49': '̂' # ้ THAI CHARACTER MAI THO
|
92
|
+
'\u0e4a': '́' # ๊ THAI CHARACTER MAI TRI
|
93
|
+
'\u0e4b': '̌' # ๋ THAI CHARACTER MAI CHATTAWA
|
94
|
+
'\u0e4c': '̒' # ์ THAI CHARACTER THANTHAKHAT
|
95
|
+
'\u0e4d': '̊' # ํ THAI CHARACTER NIKHAHIT
|
96
|
+
'\u0e4e': '~' # ๎ THAI CHARACTER YAMAKKAN
|
97
|
+
'\u0e4f': '§' # ๏ THAI CHARACTER FONGMAN
|
98
|
+
'\u0e50': '0' # ๐ THAI DIGIT ZERO
|
99
|
+
'\u0e51': '1' # ๑ THAI DIGIT ONE
|
100
|
+
'\u0e52': '2' # ๒ THAI DIGIT TWO
|
101
|
+
'\u0e53': '3' # ๓ THAI DIGIT THREE
|
102
|
+
'\u0e54': '4' # ๔ THAI DIGIT FOUR
|
103
|
+
'\u0e55': '5' # ๕ THAI DIGIT FIVE
|
104
|
+
'\u0e56': '6' # ๖ THAI DIGIT SIX
|
105
|
+
'\u0e57': '7' # ๗ THAI DIGIT SEVEN
|
106
|
+
'\u0e58': '8' # ๘ THAI DIGIT EIGHT
|
107
|
+
'\u0e59': '9' # ๙ THAI DIGIT NINE
|
108
|
+
'\u0e5a': 'ǁ' # ๚ THAI CHARACTER ANGKHANKHU
|
109
|
+
'\u0e5b': '»' # ๛ THAI CHARACTER KHOMUT
|
@@ -0,0 +1,901 @@
|
|
1
|
+
---
|
2
|
+
authority_id: kp
|
3
|
+
id: 2002
|
4
|
+
language: kor
|
5
|
+
source_script: Hang
|
6
|
+
destination_script: Latn
|
7
|
+
name: Korean Democratic People's Republic of Korea Korean System (2002)
|
8
|
+
url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/8th-uncsgn-docs/inf/8th_UNCSGN_econf.94_INF.72.pdf
|
9
|
+
creation_date:
|
10
|
+
adoption_date:
|
11
|
+
description:
|
12
|
+
|
13
|
+
notes:
|
14
|
+
|
15
|
+
- Here is a list of features that are listed in the guideline but
|
16
|
+
not unimplemented in this map.
|
17
|
+
|
18
|
+
- Note 3.2
|
19
|
+
The combination n+r is romanized as -ll- only when it is "considered
|
20
|
+
to be longstanding". In this implementation, all n+r will be romanized as
|
21
|
+
-ll- for the sake of simplicity.
|
22
|
+
|
23
|
+
- Note 3.3
|
24
|
+
Sai-siot (Connective ㅅ) is not written out in DPRK Korean, but it is
|
25
|
+
supposed to be romanized. Sai-siot is not predictable.
|
26
|
+
This has not been implemented.
|
27
|
+
|
28
|
+
- Note 4.1
|
29
|
+
Hyphen "may be inserted in case of a possible confusion in pronunciation".
|
30
|
+
Except for the n-g combination, this has not been implemented.
|
31
|
+
|
32
|
+
- Note 4.4
|
33
|
+
Geographical names "may be transliterated or translated". In this map,
|
34
|
+
all names will be transliterated, not translated. Numerals will not be
|
35
|
+
transliterated.
|
36
|
+
|
37
|
+
- Note 4.5
|
38
|
+
Spacing rule for personal names has not been implemented.
|
39
|
+
|
40
|
+
- Note 4.7
|
41
|
+
Optional omission of diacritics and optional simplification of
|
42
|
+
KK, TT, PP, SS, JJ to single letter have not been implemented.
|
43
|
+
|
44
|
+
tests:
|
45
|
+
# Note1.5
|
46
|
+
- source: "우리산"
|
47
|
+
expected: "Urisan"
|
48
|
+
|
49
|
+
# Note2.1
|
50
|
+
- source: "교구동"
|
51
|
+
expected: "Kyogu-dong"
|
52
|
+
- source: "초도"
|
53
|
+
expected: "Chodo"
|
54
|
+
- source: "고비리"
|
55
|
+
expected: "Kobi-ri"
|
56
|
+
- source: "강동"
|
57
|
+
expected: "Kangdong"
|
58
|
+
- source: "금교"
|
59
|
+
expected: "Kümgyo"
|
60
|
+
- source: "칠보산"
|
61
|
+
expected: "Chilbosan"
|
62
|
+
|
63
|
+
# Note2.2
|
64
|
+
- source: "곡산"
|
65
|
+
expected: "Koksan"
|
66
|
+
- source: "갑산"
|
67
|
+
expected: "Kapsan"
|
68
|
+
- source: "앞산"
|
69
|
+
expected: "Apsan"
|
70
|
+
- source: "삿갓봉"
|
71
|
+
expected: "Satkatbong"
|
72
|
+
|
73
|
+
# Note2.3
|
74
|
+
- source: "울산"
|
75
|
+
expected: "Ulsan"
|
76
|
+
# - source: "은률"
|
77
|
+
# expected: "Ünryul" # This is an exceptino to note 3.1
|
78
|
+
|
79
|
+
# Note2.4
|
80
|
+
- source: "닭섬"
|
81
|
+
expected: "Taksŏm"
|
82
|
+
- source: "물곬"
|
83
|
+
expected: "Mulkol"
|
84
|
+
- source: "붉은바위"
|
85
|
+
expected: "Pulgünbawi"
|
86
|
+
- source: "앉은바위"
|
87
|
+
expected: "Anjünbawi"
|
88
|
+
|
89
|
+
# Note3.1
|
90
|
+
- source: "백마산"
|
91
|
+
expected: "Paengmasan"
|
92
|
+
- source: "꽃마을"
|
93
|
+
expected: "Kkonmaül"
|
94
|
+
- source: "압록강"
|
95
|
+
expected: "Amrokgang"
|
96
|
+
|
97
|
+
# Note3.2
|
98
|
+
- source: "천리마"
|
99
|
+
expected: "Chŏllima"
|
100
|
+
# - source: "한나산" # Typo in the original document
|
101
|
+
- source: "한라산"
|
102
|
+
expected: "Hallasan"
|
103
|
+
- source: "전라도"
|
104
|
+
expected: "Jŏlla-do"
|
105
|
+
|
106
|
+
# Note3.3
|
107
|
+
|
108
|
+
# - source: "기대산" # ROK: 깃대산
|
109
|
+
# expected: "Kittaesan"
|
110
|
+
# - source: "새별읍" # ROK: 샛별
|
111
|
+
# expected: "Saeppyŏl-üp" # hyphen
|
112
|
+
# - source: "뒤문" # ROK: 뒷문
|
113
|
+
# expected: "Twinmun"
|
114
|
+
|
115
|
+
# Note4.1 - Separator (OPTIONAL)
|
116
|
+
|
117
|
+
- source: "앞-언덕"
|
118
|
+
expected: "Ap-ŏndŏk"
|
119
|
+
- source: "부억-안골"
|
120
|
+
expected: "Puŏk-angol"
|
121
|
+
- source: "판교"
|
122
|
+
expected: "Phan-gyo"
|
123
|
+
# - source: "방어동"
|
124
|
+
# expected: "Pang-ŏ-dong"
|
125
|
+
|
126
|
+
# Note4.2
|
127
|
+
- source: "평안남도 평성시"
|
128
|
+
expected: "Phyŏngannam-do Phyŏngsŏng-si"
|
129
|
+
|
130
|
+
# Note4.3
|
131
|
+
- source: "3.1동"
|
132
|
+
expected: "3.1-dong"
|
133
|
+
|
134
|
+
# Note4.6
|
135
|
+
- source: "평양"
|
136
|
+
expected: "Pyongyang"
|
137
|
+
|
138
|
+
map:
|
139
|
+
character_separator: ""
|
140
|
+
word_separator: " "
|
141
|
+
title_case: True
|
142
|
+
inherit: "nil-kor-Hang-Hang-jamo"
|
143
|
+
|
144
|
+
rules:
|
145
|
+
|
146
|
+
# This system does not require transliteration of numerals
|
147
|
+
# convert numbers to space + Hangul
|
148
|
+
# - pattern: "([^0-9 ])(?=[0-9])"
|
149
|
+
# result: "\\1 "
|
150
|
+
# - pattern: "1"
|
151
|
+
# result: "일"
|
152
|
+
# - pattern: "2"
|
153
|
+
# result: "이"
|
154
|
+
# - pattern: "3"
|
155
|
+
# result: "삼"
|
156
|
+
# - pattern: "4"
|
157
|
+
# result: "사"
|
158
|
+
# - pattern: "5"
|
159
|
+
# result: "오"
|
160
|
+
# - pattern: "6"
|
161
|
+
# result: "육"
|
162
|
+
# - pattern: "7"
|
163
|
+
# result: "칠"
|
164
|
+
# - pattern: "8"
|
165
|
+
# result: "팔"
|
166
|
+
# - pattern: "9"
|
167
|
+
# result: "구"
|
168
|
+
|
169
|
+
# Use voiced onset for geographical features
|
170
|
+
# Note 4.3.1
|
171
|
+
- pattern: "(?<=..)산( |$)"
|
172
|
+
result: "san\\1"
|
173
|
+
- pattern: "(?<=..)거리( |$)"
|
174
|
+
result: "gŏri\\1"
|
175
|
+
- pattern: "(?<=..)고개( |$)"
|
176
|
+
result: "gogae\\1"
|
177
|
+
- pattern: "(?<=..)대( |$)"
|
178
|
+
result: "dae\\1"
|
179
|
+
- pattern: "(?<=..)봉( |$)"
|
180
|
+
result: "bong\\1"
|
181
|
+
- pattern: "(?<=..)교( |$)"
|
182
|
+
result: "gyo\\1"
|
183
|
+
- pattern: "(?<=..)골( |$)"
|
184
|
+
result: "gol\\1"
|
185
|
+
- pattern: "(?<=..)각( |$)"
|
186
|
+
result: "gak\\1"
|
187
|
+
- pattern: "(?<=..)벌( |$)"
|
188
|
+
result: "bŏl\\1"
|
189
|
+
- pattern: "(?<=..)관( |$)"
|
190
|
+
result: "gwan\\1"
|
191
|
+
- pattern: "(?<=..)곶( |$)"
|
192
|
+
result: "got\\1"
|
193
|
+
- pattern: "(?<=..)강( |$)"
|
194
|
+
result: "gang\\1"
|
195
|
+
|
196
|
+
# add hyphen in front of generics
|
197
|
+
# Only add hyphen if the name is three syllables or longer
|
198
|
+
- pattern: "(?<=..)도( |$)"
|
199
|
+
result: "-do\\1"
|
200
|
+
- pattern: "(?<=..)시( |$)"
|
201
|
+
result: "-si\\1"
|
202
|
+
- pattern: "(?<=..)군( |$)"
|
203
|
+
result: "-gun\\1"
|
204
|
+
- pattern: "(?<=..)면( |$)"
|
205
|
+
result: "-myŏn\\1"
|
206
|
+
- pattern: "(?<=..)리( |$)"
|
207
|
+
result: "-ri\\1"
|
208
|
+
- pattern: "(?<=..)동( |$)"
|
209
|
+
result: "-dong\\1"
|
210
|
+
- pattern: "(?<=..)구( |$)"
|
211
|
+
result: "-gu\\1"
|
212
|
+
- pattern: "(?<=..)구역( |$)"
|
213
|
+
result: "-guyŏk\\1"
|
214
|
+
|
215
|
+
# The name Pyongyang will be an exception
|
216
|
+
# Not Phyŏngyang
|
217
|
+
|
218
|
+
- pattern: "평양"
|
219
|
+
result: "Pyongyang"
|
220
|
+
|
221
|
+
postrules:
|
222
|
+
|
223
|
+
# Add space to the two ends of the string for easier word boundary handling
|
224
|
+
- pattern: "^"
|
225
|
+
result: " "
|
226
|
+
- pattern: "$"
|
227
|
+
result: " "
|
228
|
+
|
229
|
+
# HANGUL JONGSEONG SSANGKIYEOK
|
230
|
+
- pattern: "ᆩᄋ"
|
231
|
+
result: "ᆨᄁ"
|
232
|
+
- pattern: "ᆩ"
|
233
|
+
result: "ᆨ"
|
234
|
+
|
235
|
+
# HANGUL JONGSEONG SSANGKIYEOK
|
236
|
+
- pattern: "ᆪᄋ"
|
237
|
+
result: "ᆨᄉ"
|
238
|
+
- pattern: "ᆪ"
|
239
|
+
result: "ᆨ"
|
240
|
+
|
241
|
+
# HANGUL JONGSEONG NIEUN-CIEUC
|
242
|
+
- pattern: "ᆬᄋ"
|
243
|
+
result: "ᆫᄌ"
|
244
|
+
- pattern: "ᆬ"
|
245
|
+
result: "ᆫ"
|
246
|
+
|
247
|
+
# HANGUL JONGSEONG NIEUN-CIEUC
|
248
|
+
- pattern: "ᆭᄀ"
|
249
|
+
result: "ᆫᄏ"
|
250
|
+
- pattern: "ᆭᄃ"
|
251
|
+
result: "ᆫᄐ"
|
252
|
+
- pattern: "ᆭᄇ"
|
253
|
+
result: "ᆫᄑ"
|
254
|
+
- pattern: "ᆭᄌ"
|
255
|
+
result: "ᆫᄎ"
|
256
|
+
- pattern: "ᆭ"
|
257
|
+
result: "ᆫ"
|
258
|
+
|
259
|
+
# HANGUL JONGSEONG TIEUT
|
260
|
+
- pattern: "ᆮ(?=[ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄌᄍᄎᄏᄐᄑᄒ])"
|
261
|
+
result: "ᆺ"
|
262
|
+
|
263
|
+
# HANGUL JONGSEONG RIEUL-SIOS
|
264
|
+
- pattern: "ᆳᄋ"
|
265
|
+
result: "ᆯᄉ"
|
266
|
+
- pattern: "ᆳ"
|
267
|
+
result: "ᆯ"
|
268
|
+
|
269
|
+
# HANGUL JONGSEONG RIEUL-THIEUTH
|
270
|
+
- pattern: "ᆴᄋ"
|
271
|
+
result: "ᆯᄐ"
|
272
|
+
- pattern: "ᆴ"
|
273
|
+
result: "ᆯ"
|
274
|
+
|
275
|
+
# HANGUL JONGSEONG RIEUL-PHIEUPH
|
276
|
+
- pattern: "ᆵᄋ"
|
277
|
+
result: "ᆯᄑ"
|
278
|
+
- pattern: "ᆵ(?=[ᄃᄄᄐ])"
|
279
|
+
result: "ᆯ"
|
280
|
+
- pattern: "ᆵ"
|
281
|
+
result: "ᄇ"
|
282
|
+
|
283
|
+
# HANGUL JONGSEONG RIEUL-HIEUH
|
284
|
+
- pattern: "ᆶᄀ"
|
285
|
+
result: "ᆯᄏ"
|
286
|
+
- pattern: "ᆶᄃ"
|
287
|
+
result: "ᆯᄐ"
|
288
|
+
- pattern: "ᆶᄇ"
|
289
|
+
result: "ᆯᄑ"
|
290
|
+
- pattern: "ᆶᄌ"
|
291
|
+
result: "ᆯᄎ"
|
292
|
+
- pattern: "ᆶ"
|
293
|
+
result: "ᆯ"
|
294
|
+
|
295
|
+
# HANGUL JONGSEONG PIEUP-SIOS
|
296
|
+
- pattern: "ᆹᄋ"
|
297
|
+
result: "ᄇᄉ"
|
298
|
+
- pattern: "ᆹ"
|
299
|
+
result: "ᄇ"
|
300
|
+
|
301
|
+
# HANGUL JONGSEONG SSANG-SIOS
|
302
|
+
- pattern: "ᆻᄋ"
|
303
|
+
result: "ᆺᄊ"
|
304
|
+
- pattern: "ᆻ"
|
305
|
+
result: "ᆺ"
|
306
|
+
|
307
|
+
# HANGUL JONGSEONG CIEUC
|
308
|
+
- pattern: "ᆽᄋ"
|
309
|
+
result: "ᆺᄌ"
|
310
|
+
- pattern: "ᆽ"
|
311
|
+
result: "ᆺ"
|
312
|
+
|
313
|
+
# HANGUL JONGSEONG CHIEUCH
|
314
|
+
- pattern: "ᆾᄋ"
|
315
|
+
result: "ᆺᄎ"
|
316
|
+
- pattern: "ᆾ"
|
317
|
+
result: "ᆺ"
|
318
|
+
|
319
|
+
# HANGUL JONGSEONG KHIEUKH
|
320
|
+
- pattern: "ᆿᄋ"
|
321
|
+
result: "ᆨᄏ"
|
322
|
+
- pattern: "ᆿ"
|
323
|
+
result: "ᆨ"
|
324
|
+
|
325
|
+
# HANGUL JONGSEONG THIEUTH
|
326
|
+
- pattern: "ᇀᄋ"
|
327
|
+
result: "ᆺᄐ"
|
328
|
+
- pattern: "ᇀ"
|
329
|
+
result: "ᆺ"
|
330
|
+
|
331
|
+
# HANGUL JONGSEONG PHIEUPH
|
332
|
+
- pattern: "ᇁᄋ"
|
333
|
+
result: "ᆸᄑ"
|
334
|
+
- pattern: "ᇁ"
|
335
|
+
result: "ᆸ"
|
336
|
+
|
337
|
+
# HANGUL JONGSEONG HIEUH
|
338
|
+
- pattern: "ᇂᄀ"
|
339
|
+
result: "ᄏ"
|
340
|
+
- pattern: "ᇂᄃ"
|
341
|
+
result: "ᄐ"
|
342
|
+
- pattern: "ᇂᄇ"
|
343
|
+
result: "ᄑ"
|
344
|
+
- pattern: "ᇂᄌ"
|
345
|
+
result: "ᄎ"
|
346
|
+
- pattern: "ᇂ"
|
347
|
+
result: ""
|
348
|
+
|
349
|
+
# From Unicode Chart
|
350
|
+
# https://github.com/unicode-org/cldr/blob/master/common/transforms/Korean-Latin-BGN.xml
|
351
|
+
- pattern: "ᆨᄀ"
|
352
|
+
result: "kk" # HANGUL JONGSEONG KIYEOK + CHOSEONG KIYEOK
|
353
|
+
- pattern: "ᆨᄂ"
|
354
|
+
result: "ngn" # HANGUL JONGSEONG KIYEOK + CHOSEONG NIEUN
|
355
|
+
- pattern: "ᆨᄃ"
|
356
|
+
result: "kt" # HANGUL JONGSEONG KIYEOK + CHOSEONG TIEUT
|
357
|
+
- pattern: "ᆨᄅ"
|
358
|
+
result: "ngn" # HANGUL JONGSEONG KIYEOK + CHOSEONG RIEUL
|
359
|
+
- pattern: "ᆨᄆ"
|
360
|
+
result: "ngm" # HANGUL JONGSEONG KIYEOK + CHOSEONG MIEUM
|
361
|
+
- pattern: "ᆨᄇ"
|
362
|
+
result: "kp" # HANGUL JONGSEONG KIYEOK + CHOSEONG PIEUP
|
363
|
+
- pattern: "ᆨᄉ"
|
364
|
+
result: "ks" # HANGUL JONGSEONG KIYEOK + CHOSEONG SIOS
|
365
|
+
- pattern: "ᆨᄋ"
|
366
|
+
result: "g" # HANGUL JONGSEONG KIYEOK + CHOSEONG IEUNG
|
367
|
+
- pattern: "ᆨᄌ"
|
368
|
+
result: "kj" # HANGUL JONGSEONG KIYEOK + CHOSEONG CIEUC
|
369
|
+
- pattern: "ᆨᄎ"
|
370
|
+
result: "kch" # HANGUL JONGSEONG KIYEOK + CHOSEONG CHIEUCH
|
371
|
+
- pattern: "ᆨᄏ"
|
372
|
+
result: "kkh" # HANGUL JONGSEONG KIYEOK + CHOSEONG KHIEUKH # NOTE: the dash is always skipped
|
373
|
+
- pattern: "ᆨᄐ"
|
374
|
+
result: "kth" # HANGUL JONGSEONG KIYEOK + CHOSEONG THIEUTH
|
375
|
+
- pattern: "ᆨᄑ"
|
376
|
+
result: "kp" # HANGUL JONGSEONG KIYEOK + CHOSEONG PHIEUPH
|
377
|
+
- pattern: "ᆨᄒ"
|
378
|
+
result: "kh" # HANGUL JONGSEONG KIYEOK + CHOSEONG HIEUH
|
379
|
+
- pattern: "ᆨᄁ"
|
380
|
+
result: "kkk" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGKIYEOK
|
381
|
+
- pattern: "ᆨᄄ"
|
382
|
+
result: "ktt" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGTIEUT
|
383
|
+
- pattern: "ᆨᄈ"
|
384
|
+
result: "kpp" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGPIEUP
|
385
|
+
- pattern: "ᆨᄊ"
|
386
|
+
result: "kss" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGSIOS
|
387
|
+
- pattern: "ᆨᄍ"
|
388
|
+
result: "kjj" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGCIEUC
|
389
|
+
- pattern: "ᆫᄀ"
|
390
|
+
result: "n-g" # HANGUL JONGSEONG NIEUN + CHOSEONG KIEUK
|
391
|
+
- pattern: "ᆫᄂ"
|
392
|
+
result: "nn" # HANGUL JONGSEONG NIEUN + CHOSEONG NIEUN
|
393
|
+
- pattern: "ᆫᄃ"
|
394
|
+
result: "nd" # HANGUL JONGSEONG NIEUN + CHOSEONG TIEUT
|
395
|
+
- pattern: "ᆫᄅ"
|
396
|
+
result: "ll" # HANGUL JONGSEONG NIEUN + CHOSEONG RIEUL
|
397
|
+
- pattern: "ᆫᄆ"
|
398
|
+
result: "nm" # HANGUL JONGSEONG NIEUN + CHOSEONG MIEUM
|
399
|
+
- pattern: "ᆫᄇ"
|
400
|
+
result: "nb" # HANGUL JONGSEONG NIEUN + CHOSEONG PIEUP
|
401
|
+
- pattern: "ᆫᄉ"
|
402
|
+
result: "ns" # HANGUL JONGSEONG NIEUN + CHOSEONG SIOS
|
403
|
+
- pattern: "ᆫᄋ"
|
404
|
+
result: "n" # HANGUL JONGSEONG NIEUN + CHOSEONG IEUNG
|
405
|
+
- pattern: "ᆫᄌ"
|
406
|
+
result: "nj" # HANGUL JONGSEONG NIEUN + CHOSEONG CIEUC
|
407
|
+
- pattern: "ᆫᄎ"
|
408
|
+
result: "nch" # HANGUL JONGSEONG NIEUN + CHOSEONG CHIEUCH
|
409
|
+
- pattern: "ᆫᄏ"
|
410
|
+
result: "nkh" # HANGUL JONGSEONG NIEUN + CHOSEONG KHIEUKH
|
411
|
+
- pattern: "ᆫᄐ"
|
412
|
+
result: "nth" # HANGUL JONGSEONG NIEUN + CHOSEONG THIEUTH
|
413
|
+
- pattern: "ᆫᄑ"
|
414
|
+
result: "nph" # HANGUL JONGSEONG NIEUN + CHOSEONG PHIEUPH
|
415
|
+
- pattern: "ᆫᄒ"
|
416
|
+
result: "nh" # HANGUL JONGSEONG NIEUN + CHOSEONG HIEUH
|
417
|
+
- pattern: "ᆫᄁ"
|
418
|
+
result: "nkk" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGKIYEOK
|
419
|
+
- pattern: "ᆫᄄ"
|
420
|
+
result: "ntt" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGTIEUT
|
421
|
+
- pattern: "ᆫᄈ"
|
422
|
+
result: "npp" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGPIEUP
|
423
|
+
- pattern: "ᆫᄊ"
|
424
|
+
result: "nss" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGSIOS
|
425
|
+
- pattern: "ᆫᄍ"
|
426
|
+
result: "njj" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGCIEUC
|
427
|
+
- pattern: "ᆯᄀ"
|
428
|
+
result: "lk" # HANGUL JONGSEONG RIEUL + CHOSEONG KIYEOK
|
429
|
+
- pattern: "ᆯᄂ"
|
430
|
+
result: "ll" # HANGUL JONGSEONG RIEUL + CHOSEONG NIEUN
|
431
|
+
- pattern: "ᆯᄃ"
|
432
|
+
result: "lt" # HANGUL JONGSEONG RIEUL + CHOSEONG TIEUT
|
433
|
+
- pattern: "ᆯᄅ"
|
434
|
+
result: "ll" # HANGUL JONGSEONG RIEUL + CHOSEONG RIEUL
|
435
|
+
- pattern: "ᆯᄆ"
|
436
|
+
result: "lm" # HANGUL JONGSEONG RIEUL + CHOSEONG MIEUM
|
437
|
+
- pattern: "ᆯᄇ"
|
438
|
+
result: "lb" # HANGUL JONGSEONG RIEUL + CHOSEONG PIEUP
|
439
|
+
- pattern: "ᆯᄉ"
|
440
|
+
result: "ls" # HANGUL JONGSEONG RIEUL + CHOSEONG SIOS
|
441
|
+
- pattern: "ᆯᄋ"
|
442
|
+
result: "r" # HANGUL JONGSEONG RIEUL + CHOSEONG IEUNG
|
443
|
+
- pattern: "ᆯᄌ"
|
444
|
+
result: "lj" # HANGUL JONGSEONG RIEUL + CHOSEONG CIEUC
|
445
|
+
- pattern: "ᆯᄎ"
|
446
|
+
result: "lch" # HANGUL JONGSEONG RIEUL + CHOSEONG CHIEUCH
|
447
|
+
- pattern: "ᆯᄏ"
|
448
|
+
result: "lkh" # HANGUL JONGSEONG RIEUL + CHOSEONG KHIEUKH
|
449
|
+
- pattern: "ᆯᄐ"
|
450
|
+
result: "lth" # HANGUL JONGSEONG RIEUL + CHOSEONG THIEUTH
|
451
|
+
- pattern: "ᆯᄑ"
|
452
|
+
result: "lph" # HANGUL JONGSEONG RIEUL + CHOSEONG PHIEUPH
|
453
|
+
- pattern: "ᆯᄒ"
|
454
|
+
result: "lh" # HANGUL JONGSEONG RIEUL + CHOSEONG HIEUH
|
455
|
+
- pattern: "ᆯᄁ"
|
456
|
+
result: "lkk" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGKIYEOK
|
457
|
+
- pattern: "ᆯᄄ"
|
458
|
+
result: "ltt" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGTIEUT
|
459
|
+
- pattern: "ᆯᄈ"
|
460
|
+
result: "lpp" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGPIEUP
|
461
|
+
- pattern: "ᆯᄊ"
|
462
|
+
result: "lss" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGSIOS
|
463
|
+
- pattern: "ᆯᄍ"
|
464
|
+
result: "ljj" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGCIEUC
|
465
|
+
- pattern: "ᆷᄀ"
|
466
|
+
result: "mg" # HANGUL JONGSEONG MIEUM + CHOSEONG KIYEOK
|
467
|
+
- pattern: "ᆷᄂ"
|
468
|
+
result: "mn" # HANGUL JONGSEONG MIEUM + CHOSEONG NIEUN
|
469
|
+
- pattern: "ᆷᄃ"
|
470
|
+
result: "md" # HANGUL JONGSEONG MIEUM + CHOSEONG TIEUT
|
471
|
+
- pattern: "ᆷᄅ"
|
472
|
+
result: "mr" # HANGUL JONGSEONG MIEUM + CHOSEONG RIEUL # Note 3.1
|
473
|
+
- pattern: "ᆷᄆ"
|
474
|
+
result: "mm" # HANGUL JONGSEONG MIEUM + CHOSEONG MIEUM
|
475
|
+
- pattern: "ᆷᄇ"
|
476
|
+
result: "mb" # HANGUL JONGSEONG MIEUM + CHOSEONG PIEUP
|
477
|
+
- pattern: "ᆷᄉ"
|
478
|
+
result: "ms" # HANGUL JONGSEONG MIEUM + CHOSEONG SIOS
|
479
|
+
- pattern: "ᆷᄋ"
|
480
|
+
result: "m" # HANGUL JONGSEONG MIEUM + CHOSEONG IEUNG
|
481
|
+
- pattern: "ᆷᄌ"
|
482
|
+
result: "mj" # HANGUL JONGSEONG MIEUM + CHOSEONG CIEUC
|
483
|
+
- pattern: "ᆷᄎ"
|
484
|
+
result: "mch" # HANGUL JONGSEONG MIEUM + CHOSEONG CHIEUCH
|
485
|
+
- pattern: "ᆷᄏ"
|
486
|
+
result: "mkh" # HANGUL JONGSEONG MIEUM + CHOSEONG KHIEUKH
|
487
|
+
- pattern: "ᆷᄐ"
|
488
|
+
result: "mth" # HANGUL JONGSEONG MIEUM + CHOSEONG THIEUTH
|
489
|
+
- pattern: "ᆷᄑ"
|
490
|
+
result: "mph" # HANGUL JONGSEONG MIEUM + CHOSEONG PHIEUPH
|
491
|
+
- pattern: "ᆷᄒ"
|
492
|
+
result: "mh" # HANGUL JONGSEONG MIEUM + CHOSEONG HIEUH
|
493
|
+
- pattern: "ᆷᄁ"
|
494
|
+
result: "mkk" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGKIYEOK
|
495
|
+
- pattern: "ᆷᄄ"
|
496
|
+
result: "mtt" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGTIEUT
|
497
|
+
- pattern: "ᆷᄈ"
|
498
|
+
result: "mpp" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGPIEUP
|
499
|
+
- pattern: "ᆷᄊ"
|
500
|
+
result: "mss" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGSIOS
|
501
|
+
- pattern: "ᆷᄍ"
|
502
|
+
result: "mjj" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGCIEUC
|
503
|
+
- pattern: "ᆸᄀ"
|
504
|
+
result: "pk" # HANGUL JONGSEONG PIEUP + CHOSEONG KIYEOK
|
505
|
+
- pattern: "ᆸᄂ"
|
506
|
+
result: "mn" # HANGUL JONGSEONG PIEUP + CHOSEONG NIEUN
|
507
|
+
- pattern: "ᆸᄃ"
|
508
|
+
result: "pt" # HANGUL JONGSEONG PIEUP + CHOSEONG TIEUT
|
509
|
+
- pattern: "ᆸᄅ"
|
510
|
+
result: "mr" # HANGUL JONGSEONG PIEUP + CHOSEONG RIEUL
|
511
|
+
- pattern: "ᆸᄆ"
|
512
|
+
result: "mm" # HANGUL JONGSEONG PIEUP + CHOSEONG MIEUM
|
513
|
+
- pattern: "ᆸᄇ"
|
514
|
+
result: "pp" # HANGUL JONGSEONG PIEUP + CHOSEONG PIEUP
|
515
|
+
- pattern: "ᆸᄉ"
|
516
|
+
result: "ps" # HANGUL JONGSEONG PIEUP + CHOSEONG SIOS
|
517
|
+
- pattern: "ᆸᄋ"
|
518
|
+
result: "b" # HANGUL JONGSEONG PIEUP + CHOSEONG IEUNG
|
519
|
+
- pattern: "ᆸᄌ"
|
520
|
+
result: "pj" # HANGUL JONGSEONG PIEUP + CHOSEONG CIEUC
|
521
|
+
- pattern: "ᆸᄎ"
|
522
|
+
result: "pch" # HANGUL JONGSEONG PIEUP + CHOSEONG CHIEUCH
|
523
|
+
- pattern: "ᆸᄏ"
|
524
|
+
result: "pkh" # HANGUL JONGSEONG PIEUP + CHOSEONG KHIEUKH
|
525
|
+
- pattern: "ᆸᄐ"
|
526
|
+
result: "pth" # HANGUL JONGSEONG PIEUP + CHOSEONG THIEUTH
|
527
|
+
- pattern: "ᆸᄑ"
|
528
|
+
result: "pph" # HANGUL JONGSEONG PIEUP + CHOSEONG PHIEUPH
|
529
|
+
- pattern: "ᆸᄒ"
|
530
|
+
result: "ph" # HANGUL JONGSEONG PIEUP + CHOSEONG HIEUH
|
531
|
+
- pattern: "ᆸᄁ"
|
532
|
+
result: "pkk" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGKIYEOK
|
533
|
+
- pattern: "ᆸᄄ"
|
534
|
+
result: "ptt" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGTIEUT
|
535
|
+
- pattern: "ᆸᄈ"
|
536
|
+
result: "ppp" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGPIEUP
|
537
|
+
- pattern: "ᆸᄊ"
|
538
|
+
result: "pss" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGSIOS
|
539
|
+
- pattern: "ᆸᄍ"
|
540
|
+
result: "pjj" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGCIEUC
|
541
|
+
- pattern: "ᆺᄀ"
|
542
|
+
result: "tk" # HANGUL JONGSEONG SIOS + CHOSEONG KIYEOK
|
543
|
+
- pattern: "ᆺᄂ"
|
544
|
+
result: "nn" # HANGUL JONGSEONG SIOS + CHOSEONG NIEUN
|
545
|
+
- pattern: "ᆺᄃ"
|
546
|
+
result: "tt" # HANGUL JONGSEONG SIOS + CHOSEONG TIEUT
|
547
|
+
- pattern: "ᆺᄅ"
|
548
|
+
result: "nr" # HANGUL JONGSEONG SIOS + CHOSEONG RIEUL # Note 3.1
|
549
|
+
- pattern: "ᆺᄆ"
|
550
|
+
result: "nm" # HANGUL JONGSEONG SIOS + CHOSEONG MIEUM
|
551
|
+
- pattern: "ᆺᄇ"
|
552
|
+
result: "tp" # HANGUL JONGSEONG SIOS + CHOSEONG PIEUP
|
553
|
+
- pattern: "ᆺᄉ"
|
554
|
+
result: "ts" # HANGUL JONGSEONG SIOS + CHOSEONG SIOS
|
555
|
+
- pattern: "ᆺᄋ"
|
556
|
+
result: "d" # HANGUL JONGSEONG SIOS + CHOSEONG IEUNG
|
557
|
+
- pattern: "ᆺᄌ"
|
558
|
+
result: "tj" # HANGUL JONGSEONG SIOS + CHOSEONG CIEUC
|
559
|
+
- pattern: "ᆺᄎ"
|
560
|
+
result: "tch" # HANGUL JONGSEONG SIOS + CHOSEONG CHIEUCH
|
561
|
+
- pattern: "ᆺᄏ"
|
562
|
+
result: "tkh" # HANGUL JONGSEONG SIOS + CHOSEONG KHIEUKH
|
563
|
+
- pattern: "ᆺᄐ"
|
564
|
+
result: "tth" # HANGUL JONGSEONG SIOS + CHOSEONG THIEUTH
|
565
|
+
- pattern: "ᆺᄑ"
|
566
|
+
result: "tph" # HANGUL JONGSEONG SIOS + CHOSEONG PHIEUPH
|
567
|
+
- pattern: "ᆺᄒ"
|
568
|
+
result: "th" # HANGUL JONGSEONG SIOS + CHOSEONG HIEUH
|
569
|
+
- pattern: "ᆺᄁ"
|
570
|
+
result: "tkk" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGKIYEOK
|
571
|
+
- pattern: "ᆺᄄ"
|
572
|
+
result: "ttt" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGTIEUT
|
573
|
+
- pattern: "ᆺᄈ"
|
574
|
+
result: "tpp" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGPIEUP
|
575
|
+
- pattern: "ᆺᄊ"
|
576
|
+
result: "tss" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGSIOS
|
577
|
+
- pattern: "ᆺᄍ"
|
578
|
+
result: "tjj" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGCIEUC
|
579
|
+
- pattern: "ᆼᄀ"
|
580
|
+
result: "ngg" # HANGUL JONGSEONG IEUNG + CHOSEONG KIYEOK
|
581
|
+
- pattern: "ᆼᄂ"
|
582
|
+
result: "ngn" # HANGUL JONGSEONG IEUNG + CHOSEONG NIEUN
|
583
|
+
- pattern: "ᆼᄃ"
|
584
|
+
result: "ngd" # HANGUL JONGSEONG IEUNG + CHOSEONG TIEUT
|
585
|
+
- pattern: "ᆼᄅ"
|
586
|
+
result: "ngn" # HANGUL JONGSEONG IEUNG + CHOSEONG RIEUL
|
587
|
+
- pattern: "ᆼᄆ"
|
588
|
+
result: "ngm" # HANGUL JONGSEONG IEUNG + CHOSEONG MIEUM
|
589
|
+
- pattern: "ᆼᄇ"
|
590
|
+
result: "ngb" # HANGUL JONGSEONG IEUNG + CHOSEONG PIEUP
|
591
|
+
- pattern: "ᆼᄉ"
|
592
|
+
result: "ngs" # HANGUL JONGSEONG IEUNG + CHOSEONG SIOS
|
593
|
+
- pattern: "ᆼᄋ"
|
594
|
+
result: "ng" # HANGUL JONGSEONG IEUNG + CHOSEONG IEUNG
|
595
|
+
- pattern: "ᆼᄌ"
|
596
|
+
result: "ngj" # HANGUL JONGSEONG IEUNG + CHOSEONG CIEUC
|
597
|
+
- pattern: "ᆼᄎ"
|
598
|
+
result: "ngch" # HANGUL JONGSEONG IEUNG + CHOSEONG CHIEUCH
|
599
|
+
- pattern: "ᆼᄏ"
|
600
|
+
result: "ngkh" # HANGUL JONGSEONG IEUNG + CHOSEONG KHIEUKH
|
601
|
+
- pattern: "ᆼᄐ"
|
602
|
+
result: "ngth" # HANGUL JONGSEONG IEUNG + CHOSEONG THIEUTH
|
603
|
+
- pattern: "ᆼᄑ"
|
604
|
+
result: "ngph" # HANGUL JONGSEONG IEUNG + CHOSEONG PHIEUPH
|
605
|
+
- pattern: "ᆼᄒ"
|
606
|
+
result: "ngh" # HANGUL JONGSEONG IEUNG + CHOSEONG HIEUH
|
607
|
+
- pattern: "ᆼᄁ"
|
608
|
+
result: "ngkk" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGKIYEOK
|
609
|
+
- pattern: "ᆼᄄ"
|
610
|
+
result: "ngtt" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGTIEUT
|
611
|
+
- pattern: "ᆼᄈ"
|
612
|
+
result: "ngpp" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGPIEUP
|
613
|
+
- pattern: "ᆼᄊ"
|
614
|
+
result: "ngss" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGSIOS
|
615
|
+
- pattern: "ᆼᄍ"
|
616
|
+
result: "ngjj" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGCIEUC
|
617
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄀ"
|
618
|
+
result: "g" # VOWEL + CHOSEONG KIYEOK # c.f. Note 3.3
|
619
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄂ"
|
620
|
+
result: "n" # VOWEL + CHOSEONG NIEUN # c.f. Note 3.3
|
621
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄃ"
|
622
|
+
result: "d" # VOWEL + CHOSEONG TIEUT # c.f. Note 3.3
|
623
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄅ"
|
624
|
+
result: "r" # VOWEL + CHOSEONG RIEUL
|
625
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄆ"
|
626
|
+
result: "m" # VOWEL + CHOSEONG MIEUM # c.f. Note 3.3
|
627
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄇ"
|
628
|
+
result: "b" # VOWEL + CHOSEONG PIEUP # c.f. Note 3.3
|
629
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄉ"
|
630
|
+
result: "s" # VOWEL + CHOSEONG SIOS
|
631
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄋ"
|
632
|
+
result: "" # VOWEL + CHOSEONG IEUNG
|
633
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄌ"
|
634
|
+
result: "j" # VOWEL + CHOSEONG CIEUC
|
635
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄎ"
|
636
|
+
result: "ch" # VOWEL + CHOSEONG CHIEUCH
|
637
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄏ"
|
638
|
+
result: "kh" # VOWEL + CHOSEONG KHIEUKH
|
639
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄐ"
|
640
|
+
result: "th" # VOWEL + CHOSEONG THIEUTH
|
641
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄑ"
|
642
|
+
result: "ph" # VOWEL + CHOSEONG PHIEUPH
|
643
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄒ"
|
644
|
+
result: "h" # VOWEL + CHOSEONG HIEUH
|
645
|
+
- pattern: "(?<=[-A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄁ"
|
646
|
+
result: "kk" # VOWEL + CHOSEONG SSANGKIYEOK
|
647
|
+
- pattern: "(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄄ"
|
648
|
+
result: "tt" # VOWEL + CHOSEONG SSANGTIEUT
|
649
|
+
- pattern: "(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄈ"
|
650
|
+
result: "pp" # VOWEL + CHOSEONG SSANGPIEUP
|
651
|
+
- pattern: "(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄊ"
|
652
|
+
result: "ss" # VOWEL + CHOSEONG SSANGSIOS
|
653
|
+
- pattern: "(?<=[A-Za-z0-9ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ])ᄍ"
|
654
|
+
result: "jj" # VOWEL + CHOSEONG SSANGCIEUC
|
655
|
+
- pattern: "ᆰᄀ"
|
656
|
+
result: "lg" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG KIYEOK
|
657
|
+
- pattern: "ᆰᄂ"
|
658
|
+
result: "ngn" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG NIEUN
|
659
|
+
- pattern: "ᆰᄃ"
|
660
|
+
result: "kt" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG TIEUT
|
661
|
+
- pattern: "ᆰᄅ"
|
662
|
+
result: "ngn" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG RIEUL
|
663
|
+
- pattern: "ᆰᄆ"
|
664
|
+
result: "ngm" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG MIEUM
|
665
|
+
- pattern: "ᆰᄇ"
|
666
|
+
result: "kp" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG PIEUP
|
667
|
+
- pattern: "ᆰᄉ"
|
668
|
+
result: "ks" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SIOS
|
669
|
+
- pattern: "ᆰᄋ"
|
670
|
+
result: "lg" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG IEUNG
|
671
|
+
- pattern: "ᆰᄌ"
|
672
|
+
result: "kj" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG CIEUC
|
673
|
+
- pattern: "ᆰᄎ"
|
674
|
+
result: "kch" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG CHIEUCH
|
675
|
+
- pattern: "ᆰᄏ"
|
676
|
+
result: "lkh" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG KHIEUKH
|
677
|
+
- pattern: "ᆰᄐ"
|
678
|
+
result: "kth" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG THIEUTH
|
679
|
+
- pattern: "ᆰᄑ"
|
680
|
+
result: "kph" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG PHIEUPH
|
681
|
+
- pattern: "ᆰᄒ"
|
682
|
+
result: "lkh" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG HIEUH
|
683
|
+
- pattern: "ᆰᄁ"
|
684
|
+
result: "lkk" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGKIYEOK
|
685
|
+
- pattern: "ᆰᄄ"
|
686
|
+
result: "ktt" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGTIEUT
|
687
|
+
- pattern: "ᆰᄈ"
|
688
|
+
result: "kpp" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGPIEUP
|
689
|
+
- pattern: "ᆰᄊ"
|
690
|
+
result: "kss" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGSIOS
|
691
|
+
- pattern: "ᆰᄍ"
|
692
|
+
result: "kjj" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGCIEUC
|
693
|
+
- pattern: "ᆱᄀ"
|
694
|
+
result: "mg" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG KIYEOK
|
695
|
+
- pattern: "ᆱᄂ"
|
696
|
+
result: "mn" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG NIEUN
|
697
|
+
- pattern: "ᆱᄃ"
|
698
|
+
result: "md" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG TIEUT
|
699
|
+
- pattern: "ᆱᄅ"
|
700
|
+
result: "mr" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG RIEUL
|
701
|
+
- pattern: "ᆱᄆ"
|
702
|
+
result: "lm" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG MIEUM
|
703
|
+
- pattern: "ᆱᄇ"
|
704
|
+
result: "mb" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG PIEUP
|
705
|
+
- pattern: "ᆱᄉ"
|
706
|
+
result: "ms" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SIOS
|
707
|
+
- pattern: "ᆱᄋ"
|
708
|
+
result: "lm" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG IEUNG
|
709
|
+
- pattern: "ᆱᄌ"
|
710
|
+
result: "mj" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG CIEUC
|
711
|
+
- pattern: "ᆱᄎ"
|
712
|
+
result: "mch" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG CHIEUCH
|
713
|
+
- pattern: "ᆱᄏ"
|
714
|
+
result: "mkh" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG KHIEUKH
|
715
|
+
- pattern: "ᆱᄐ"
|
716
|
+
result: "mth" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG THIEUTH
|
717
|
+
- pattern: "ᆱᄑ"
|
718
|
+
result: "mph" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG PHIEUPH
|
719
|
+
- pattern: "ᆱᄒ"
|
720
|
+
result: "mh" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG HIEUH
|
721
|
+
- pattern: "ᆱᄁ"
|
722
|
+
result: "mkk" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGKIYEOK
|
723
|
+
- pattern: "ᆱᄄ"
|
724
|
+
result: "mtt" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGTIEUT
|
725
|
+
- pattern: "ᆱᄈ"
|
726
|
+
result: "mpp" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGPIEUP
|
727
|
+
- pattern: "ᆱᄊ"
|
728
|
+
result: "mss" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGSIOS
|
729
|
+
- pattern: "ᆱᄍ"
|
730
|
+
result: "mjj" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGCIEUC
|
731
|
+
- pattern: "ᆲᄀ"
|
732
|
+
result: "pk" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG KIYEOK
|
733
|
+
- pattern: "ᆲᄂ"
|
734
|
+
result: "mn" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG NIEUN
|
735
|
+
- pattern: "ᆲᄃ"
|
736
|
+
result: "pt" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG TIEUT
|
737
|
+
- pattern: "ᆲᄅ"
|
738
|
+
result: "mr" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG RIEUL
|
739
|
+
- pattern: "ᆲᄆ"
|
740
|
+
result: "mm" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG MIEUM
|
741
|
+
- pattern: "ᆲᄇ"
|
742
|
+
result: "lb" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG PIEUP
|
743
|
+
- pattern: "ᆲᄉ"
|
744
|
+
result: "ps" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SIOS
|
745
|
+
- pattern: "ᆲᄋ"
|
746
|
+
result: "lb" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG IEUNG
|
747
|
+
- pattern: "ᆲᄌ"
|
748
|
+
result: "pj" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG CIEUC
|
749
|
+
- pattern: "ᆲᄎ"
|
750
|
+
result: "pch" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG CHIEUCH
|
751
|
+
- pattern: "ᆲᄏ"
|
752
|
+
result: "pkh" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG KHIEUKH
|
753
|
+
- pattern: "ᆲᄐ"
|
754
|
+
result: "pth" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG THIEUTH
|
755
|
+
- pattern: "ᆲᄑ"
|
756
|
+
result: "lph" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG PHIEUPH
|
757
|
+
- pattern: "ᆲᄒ"
|
758
|
+
result: "lph" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG HIEUH
|
759
|
+
- pattern: "ᆲᄁ"
|
760
|
+
result: "pkk" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGKIYEOK
|
761
|
+
- pattern: "ᆲᄄ"
|
762
|
+
result: "ptt" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGTIEUT
|
763
|
+
- pattern: "ᆲᄈ"
|
764
|
+
result: "lpp" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGPIEUP
|
765
|
+
- pattern: "ᆲᄊ"
|
766
|
+
result: "pss" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGSIOS
|
767
|
+
- pattern: "ᆲᄍ"
|
768
|
+
result: "pjj" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGCIEUC
|
769
|
+
- pattern: "(?<= )ᄀ"
|
770
|
+
result: "k" # HANGUL CHOSEONG KIYEOK
|
771
|
+
- pattern: "(?<= )ᄂ"
|
772
|
+
result: "n" # HANGUL CHOSEONG NIEUN
|
773
|
+
- pattern: "(?<= )ᄃ"
|
774
|
+
result: "t" # HANGUL CHOSEONG TIEUT
|
775
|
+
|
776
|
+
# DPRK does not follow the R-onset rule
|
777
|
+
# - pattern: "(?<= )ᄅ(?=[ᅣᅤᅧᅨᅭᅲ])"
|
778
|
+
# result: "" # HANGUL CHOSEONG RIEUL # R-onset rule
|
779
|
+
- pattern: "(?<= )ᄅ"
|
780
|
+
# result: "n" # HANGUL CHOSEONG RIEUL
|
781
|
+
result: "r"
|
782
|
+
|
783
|
+
- pattern: "(?<= )ᄆ"
|
784
|
+
result: "m" # HANGUL CHOSEONG MIEUM
|
785
|
+
- pattern: "(?<= )ᄇ"
|
786
|
+
result: "p" # HANGUL CHOSEONG PIEUP
|
787
|
+
- pattern: "(?<= )ᄉ"
|
788
|
+
result: "s" # HANGUL CHOSEONG SIOS
|
789
|
+
- pattern: "(?<= )ᄋ"
|
790
|
+
result: "" # HANGUL CHOSEONG IEUNG
|
791
|
+
- pattern: "(?<= )ᄌ"
|
792
|
+
result: "j" # HANGUL CHOSEONG CIEUC
|
793
|
+
- pattern: "(?<= )ᄎ"
|
794
|
+
result: "ch" # HANGUL CHOSEONG CHIEUCH
|
795
|
+
- pattern: "(?<= )ᄏ"
|
796
|
+
result: "kh" # HANGUL CHOSEONG KHIEUKH
|
797
|
+
- pattern: "(?<= )ᄐ"
|
798
|
+
result: "th" # HANGUL CHOSEONG THIEUTH
|
799
|
+
- pattern: "(?<= )ᄑ"
|
800
|
+
result: "ph" # HANGUL CHOSEONG PHIEUPH
|
801
|
+
- pattern: "(?<= )ᄒ"
|
802
|
+
result: "h" # HANGUL CHOSEONG HIEUH
|
803
|
+
- pattern: "(?<= )ᄁ"
|
804
|
+
result: "kk" # HANGUL CHOSEONG SSANGKIYEOK
|
805
|
+
- pattern: "(?<= )ᄭ"
|
806
|
+
result: "kk" # HANGUL CHOSEONG SIOS-KIYEOK
|
807
|
+
- pattern: "(?<= )ᄄ"
|
808
|
+
result: "tt" # HANGUL CHOSEONG SSANGTIEUT
|
809
|
+
- pattern: "(?<= )ᄯ"
|
810
|
+
result: "tt" # HANGUL CHOSEONG SIOS-TIEUT
|
811
|
+
- pattern: "(?<= )ᄈ"
|
812
|
+
result: "pp" # HANGUL CHOSEONG SSANGPIEUP
|
813
|
+
- pattern: "(?<= )ᄲ"
|
814
|
+
result: "pp" # HANGUL CHOSEONG SIOS-PIEUP
|
815
|
+
- pattern: "(?<= )ᄊ"
|
816
|
+
result: "ss" # HANGUL CHOSEONG SSANGSIOS
|
817
|
+
- pattern: "(?<= )ᄍ"
|
818
|
+
result: "jj" # HANGUL CHOSEONG SSANGCIEUC
|
819
|
+
- pattern: "(?<= )ᄶ"
|
820
|
+
result: "jj" # HANGUL CHOSEONG SIOS-CIEUC
|
821
|
+
- pattern: "ᅡ"
|
822
|
+
result: "a" # HANGUL JUNGSEONG A
|
823
|
+
- pattern: "ᅣ"
|
824
|
+
result: "ya" # HANGUL JUNGSEONG YA
|
825
|
+
- pattern: "ᅥ"
|
826
|
+
result: "ŏ" # HANGUL JUNGSEONG EO
|
827
|
+
- pattern: "ᅧ"
|
828
|
+
result: "yŏ" # HANGUL JUNGSEONG YEO
|
829
|
+
- pattern: "ᅩ"
|
830
|
+
result: "o" # HANGUL JUNGSEONG O
|
831
|
+
- pattern: "ᅭ"
|
832
|
+
result: "yo" # HANGUL JUNGSEONG YO
|
833
|
+
- pattern: "ᅮ"
|
834
|
+
result: "u" # HANGUL JUNGSEONG U
|
835
|
+
- pattern: "ᅲ"
|
836
|
+
result: "yu" # HANGUL JUNGSEONG YU
|
837
|
+
- pattern: "ᅳ"
|
838
|
+
result: "ü" # HANGUL JUNGSEONG EU
|
839
|
+
- pattern: "ᅵ"
|
840
|
+
result: "i" # HANGUL JUNGSEONG I
|
841
|
+
- pattern: "ᅢ"
|
842
|
+
result: "ae" # HANGUL JUNGSEONG AE
|
843
|
+
- pattern: "ᅤ"
|
844
|
+
result: "yae" # HANGUL JUNGSEONG YAE
|
845
|
+
- pattern: "ᅦ"
|
846
|
+
result: "e" # HANGUL JUNGSEONG E
|
847
|
+
- pattern: "ᅨ"
|
848
|
+
result: "ye" # HANGUL JUNGSEONG YE
|
849
|
+
- pattern: "ᅬ"
|
850
|
+
result: "oe" # HANGUL JUNGSEONG OE
|
851
|
+
- pattern: "ᅱ"
|
852
|
+
result: "wi" # HANGUL JUNGSEONG WI
|
853
|
+
- pattern: "ᅴ"
|
854
|
+
result: "üi" # HANGUL JUNGSEONG YI
|
855
|
+
- pattern: "ᅪ"
|
856
|
+
result: "wa" # HANGUL JUNGSEONG WA
|
857
|
+
- pattern: "ᅯ"
|
858
|
+
result: "wo" # HANGUL JUNGSEONG WEO
|
859
|
+
- pattern: "ᅫ"
|
860
|
+
result: "wae" # HANGUL JUNGSEONG WAE
|
861
|
+
- pattern: "ᅰ"
|
862
|
+
result: "we" # HANGUL JUNGSEONG WE
|
863
|
+
- pattern: "ᆨ(?=[ A-Za-z0-9-])"
|
864
|
+
result: "k" # HANGUL JONGSEONG KIYEOK
|
865
|
+
- pattern: "ᆫ(?=[ A-Za-z0-9-])"
|
866
|
+
result: "n" # HANGUL JONGSEONG NIEUN
|
867
|
+
- pattern: "ᆮ(?=[ A-Za-z0-9-])"
|
868
|
+
result: "t" # HANGUL JONGSEONG TIEUT
|
869
|
+
- pattern: "ᆯ(?=[ A-Za-z0-9-])"
|
870
|
+
result: "l" # HANGUL JONGSEONG RIEUL
|
871
|
+
- pattern: "ᆷ(?=[ A-Za-z0-9-])"
|
872
|
+
result: "m" # HANGUL JONGSEONG MIEUM
|
873
|
+
- pattern: "ᆸ(?=[ A-Za-z0-9-])"
|
874
|
+
result: "p" # HANGUL JONGSEONG PIEUP
|
875
|
+
- pattern: "ᆺ(?=[ A-Za-z0-9-])"
|
876
|
+
result: "t" # HANGUL JONGSEONG SIOS
|
877
|
+
- pattern: "ᆼ(?=[ A-Za-z0-9-])"
|
878
|
+
result: "ng" # HANGUL JONGSEONG IEUNG
|
879
|
+
- pattern: "ᆽ(?=[ A-Za-z0-9-])"
|
880
|
+
result: "t" # HANGUL JONGSEONG CIEUC
|
881
|
+
- pattern: "ᆾ(?=[ A-Za-z0-9-])"
|
882
|
+
result: "t" # HANGUL JONGSEONG CHIEUCH
|
883
|
+
- pattern: "ᆿ(?=[ A-Za-z0-9-])"
|
884
|
+
result: "k" # HANGUL JONGSEONG KHIEUKH
|
885
|
+
- pattern: "ᇀ(?=[ A-Za-z0-9-])"
|
886
|
+
result: "t" # HANGUL JONGSEONG THIEUTH
|
887
|
+
- pattern: "ᇁ(?=[ A-Za-z0-9-])"
|
888
|
+
result: "p" # HANGUL JONGSEONG PHIEUPH
|
889
|
+
- pattern: "ᆰ(?=[ A-Za-z0-9-])"
|
890
|
+
result: "k" # HANGUL JONGSEONG RIEUL-KIYEOK
|
891
|
+
- pattern: "ᆲ(?=[ A-Za-z0-9-])"
|
892
|
+
result: "p" # HANGUL JONGSEONG RIEUL-PIEUP
|
893
|
+
|
894
|
+
# Remove space added
|
895
|
+
- pattern: "^ "
|
896
|
+
result: ""
|
897
|
+
- pattern: " $"
|
898
|
+
result: ""
|
899
|
+
|
900
|
+
characters:
|
901
|
+
# This is based on Jamo
|