interscript 0.1.1 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +250 -17
- data/bin/interscript +38 -17
- data/bin/setup +8 -0
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/g2pwrapper.py +34 -0
- data/lib/interscript-opal.rb +2 -0
- data/lib/interscript.rb +138 -20
- data/lib/interscript/command.rb +28 -0
- data/lib/interscript/fs.rb +71 -0
- data/lib/interscript/mapping.rb +142 -0
- data/lib/interscript/opal.rb +27 -0
- data/lib/interscript/opal/maps.js.erb +10 -0
- data/lib/interscript/opal_map_translate.rb +12 -0
- data/lib/interscript/version.rb +1 -1
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
- data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
- data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-bel-Cyrl-Latn-1997.yaml +125 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +624 -0
- data/maps/alalc-ell-Grek-Latn-2010.yaml +627 -0
- data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
- data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
- data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
- data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
- data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
- data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +221 -0
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +174 -0
- data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +169 -0
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
- data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
- data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +108 -0
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +184 -0
- data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +285 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +38 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +701 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +19 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +92 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +314 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +162 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
- data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
- data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
- data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
- data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
- data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
- data/maps/ggg-kat-Geor-Latn-2002.yaml +88 -0
- data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
- data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
- data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +186 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +136 -0
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +118 -0
- data/maps/icao-heb-Hebr-Latn-9303.yaml +151 -0
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +117 -0
- data/maps/icao-per-Arab-Latn-9303.yaml +103 -0
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +117 -0
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +117 -0
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +119 -0
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +609 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +40 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +271 -0
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
- data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
- data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
- data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
- data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
- data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
- data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
- data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
- data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
- data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
- data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
- data/maps/sac-zho-Hans-Latn-1979.yaml +24759 -0
- data/maps/ses-ara-Arab-Latn-1930.yaml +279 -0
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
- data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
- data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
- data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +99 -0
- data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
- data/maps/un-ukr-Cyrl-Latn-1998.yaml +30 -0
- data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
- data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
- data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
- data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
- data/spec/interscript/mapping_spec.rb +42 -0
- data/spec/interscript_spec.rb +26 -0
- data/spec/spec_helper.rb +3 -0
- metadata +298 -18
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 1996
|
|
4
|
+
language: ell
|
|
5
|
+
source_script: Grek
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: BGN/PCGN 1996 System
|
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693694/ROMANIZATION_OF_GREEK.pdf
|
|
9
|
+
creation_date: 1996
|
|
10
|
+
description: |
|
|
11
|
+
BGN/PCGN Romanization table for Greek
|
|
12
|
+
|
|
13
|
+
note:
|
|
14
|
+
- Identical to ELOT 743:1982, which is also adopted as ISO 843:1997 and by UNGEGN
|
|
15
|
+
|
|
16
|
+
map:
|
|
17
|
+
character_separator: ""
|
|
18
|
+
word_separator: " "
|
|
19
|
+
inherit: "elot-ell-Grek-Latn-743-1982-ts"
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 1930
|
|
4
|
+
language: jpn
|
|
5
|
+
source_script: Hrkt
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Japanese Kana Modified Hepburn 1930 System
|
|
8
|
+
url:
|
|
9
|
+
creation_date:
|
|
10
|
+
adoption_date:
|
|
11
|
+
description:
|
|
12
|
+
|
|
13
|
+
notes:
|
|
14
|
+
- Segmentation needs to be done before using this map
|
|
15
|
+
- Note 5 in the specification states that when identical repeating vowels
|
|
16
|
+
belong to different kanji characters, they shall be romanized
|
|
17
|
+
individually and an apostrophe (’) shall be placed between the vowels.
|
|
18
|
+
However since this is a map from Kana to Hepburn, there is no way to
|
|
19
|
+
implement this feature.
|
|
20
|
+
- The documentation did not specify how the hyphen should be used.
|
|
21
|
+
|
|
22
|
+
tests:
|
|
23
|
+
|
|
24
|
+
- source: てがた-からみでん
|
|
25
|
+
expected: "Tegata-karamiden"
|
|
26
|
+
- source: てがた-すみよしちょう
|
|
27
|
+
expected: "Tegata-sumiyoshichō"
|
|
28
|
+
- source: さいのはま
|
|
29
|
+
expected: "Sainohama"
|
|
30
|
+
- source: てがた-たなか
|
|
31
|
+
expected: "Tegata-tanaka"
|
|
32
|
+
- source: ほりおでん
|
|
33
|
+
expected: "Horioden"
|
|
34
|
+
- source: そえがわ
|
|
35
|
+
expected: "Soegawa"
|
|
36
|
+
- source: ふねがさわ
|
|
37
|
+
expected: "Funegasawa"
|
|
38
|
+
- source: とくまんだて
|
|
39
|
+
expected: "Tokumandate"
|
|
40
|
+
- source: たてない
|
|
41
|
+
expected: "Tatenai"
|
|
42
|
+
- source: つるがさき
|
|
43
|
+
expected: "Tsurugasaki"
|
|
44
|
+
- source: しもやつせ
|
|
45
|
+
expected: "Shimoyatsuse"
|
|
46
|
+
- source: かみやつせ
|
|
47
|
+
expected: "Kamiyatsuse"
|
|
48
|
+
- source: しんとうだ
|
|
49
|
+
expected: "Shintōda"
|
|
50
|
+
- source: かじのめ
|
|
51
|
+
expected: "Kajinome"
|
|
52
|
+
- source: まえぎ
|
|
53
|
+
expected: "Maegi"
|
|
54
|
+
- source: くろさわ やま
|
|
55
|
+
expected: "Kurosawa Yama"
|
|
56
|
+
- source: いちのさわ がわ
|
|
57
|
+
expected: "Ichinosawa Gawa"
|
|
58
|
+
- source: はちやまえ
|
|
59
|
+
expected: "Hachiyamae"
|
|
60
|
+
- source: やち
|
|
61
|
+
expected: "Yachi"
|
|
62
|
+
- source: たてぬま
|
|
63
|
+
expected: "Tatenuma"
|
|
64
|
+
- source: しらはま
|
|
65
|
+
expected: "Shirahama"
|
|
66
|
+
- source: けせんまち
|
|
67
|
+
expected: "Kesenmachi"
|
|
68
|
+
- source: けいだい-かわら
|
|
69
|
+
expected: "Keidai-kawara"
|
|
70
|
+
- source: いしやました
|
|
71
|
+
expected: "Ishiyamashita"
|
|
72
|
+
- source: なえひら-やち
|
|
73
|
+
expected: "Naehira-yachi"
|
|
74
|
+
- source: とみの
|
|
75
|
+
expected: "Tomino"
|
|
76
|
+
- source: あらや-たかみまち
|
|
77
|
+
expected: "Araya-takamimachi"
|
|
78
|
+
- source: ながた
|
|
79
|
+
expected: "Nagata"
|
|
80
|
+
- source: とどろき おんせん
|
|
81
|
+
expected: "Todoroki Onsen"
|
|
82
|
+
- source: かしわぎはら
|
|
83
|
+
expected: "Kashiwagihara"
|
|
84
|
+
- source: とやけもり やま
|
|
85
|
+
expected: "Toyakemori Yama"
|
|
86
|
+
- source: なかさい
|
|
87
|
+
expected: "Nakasai"
|
|
88
|
+
- source: たけした
|
|
89
|
+
expected: "Takeshita"
|
|
90
|
+
- source: みと
|
|
91
|
+
expected: "Mito"
|
|
92
|
+
- source: みなみなかさと
|
|
93
|
+
expected: "Minaminakasato"
|
|
94
|
+
- source: みずおし
|
|
95
|
+
expected: "Mizuoshi"
|
|
96
|
+
- source: なかさと
|
|
97
|
+
expected: "Nakasato"
|
|
98
|
+
- source: しんかりば
|
|
99
|
+
expected: "Shinkariba"
|
|
100
|
+
- source: しんかみぬま
|
|
101
|
+
expected: "Shinkaminuma"
|
|
102
|
+
- source: しんばし
|
|
103
|
+
expected: "Shinbashi"
|
|
104
|
+
- source: りくぜんやました えき
|
|
105
|
+
expected: "Rikuzen’yamashita Eki"
|
|
106
|
+
- source: うしじまにし
|
|
107
|
+
expected: "Ushijimanishi"
|
|
108
|
+
- source: はまえば
|
|
109
|
+
expected: "Hamaeba"
|
|
110
|
+
- source: ぬまむかい
|
|
111
|
+
expected: "Numamukai"
|
|
112
|
+
- source: さんげんやち
|
|
113
|
+
expected: "Sangen’yachi"
|
|
114
|
+
- source: にけんやち
|
|
115
|
+
expected: "Niken’yachi"
|
|
116
|
+
- source: やちなか
|
|
117
|
+
expected: "Yachinaka"
|
|
118
|
+
- source: なす がわ
|
|
119
|
+
expected: "Nasu Gawa"
|
|
120
|
+
- source: おおはらはま
|
|
121
|
+
expected: "Ōharahama"
|
|
122
|
+
- source: うるご がわ
|
|
123
|
+
expected: "Urugo Gawa"
|
|
124
|
+
- source: なかばせ
|
|
125
|
+
expected: "Nakabase"
|
|
126
|
+
- source: うと えき
|
|
127
|
+
expected: "Uto Eki"
|
|
128
|
+
- source: みずまち
|
|
129
|
+
expected: "Mizumachi"
|
|
130
|
+
- source: ごんげんどう
|
|
131
|
+
expected: "Gongendō"
|
|
132
|
+
- source: いとひさ
|
|
133
|
+
expected: "Itohisa"
|
|
134
|
+
- source: あらおい
|
|
135
|
+
expected: "Araoi"
|
|
136
|
+
- source: わんめ
|
|
137
|
+
expected: "Wanme"
|
|
138
|
+
- source: かじろ
|
|
139
|
+
expected: "Kajiro"
|
|
140
|
+
- source: みやばら
|
|
141
|
+
expected: "Miyabara"
|
|
142
|
+
- source: いまどみ
|
|
143
|
+
expected: "Imadomi"
|
|
144
|
+
- source: かいほ
|
|
145
|
+
expected: "Kaiho"
|
|
146
|
+
- source: かいほ ぼえん
|
|
147
|
+
expected: "Kaiho Boen"
|
|
148
|
+
- source: ひきだ
|
|
149
|
+
expected: "Hikida"
|
|
150
|
+
- source: あさい-こむかい
|
|
151
|
+
expected: "Asai-komukai"
|
|
152
|
+
- source: こうざか
|
|
153
|
+
expected: "Kōzaka"
|
|
154
|
+
- source: こうふうだい
|
|
155
|
+
expected: "Kōfūdai"
|
|
156
|
+
- source: たての
|
|
157
|
+
expected: "Tateno"
|
|
158
|
+
- source: センター
|
|
159
|
+
expected: "Sentā"
|
|
160
|
+
- source: フィリピン
|
|
161
|
+
expected: "Firipin"
|
|
162
|
+
- source: ヴィオリン
|
|
163
|
+
expected: "Viorin"
|
|
164
|
+
- source: クォーター
|
|
165
|
+
expected: "Kwōtā"
|
|
166
|
+
- source: パッチリ
|
|
167
|
+
expected: "Patchiri"
|
|
168
|
+
- source: ぽっぽっや
|
|
169
|
+
expected: "Poppoyya"
|
|
170
|
+
|
|
171
|
+
map:
|
|
172
|
+
character_separator: ""
|
|
173
|
+
word_separator: " "
|
|
174
|
+
title_case: True
|
|
175
|
+
inherit: var-jpn-Hrkt-Latn-hepburn-1954
|
|
176
|
+
|
|
177
|
+
characters:
|
|
178
|
+
# Rare sounds, Table 2 & 4
|
|
179
|
+
|
|
180
|
+
"くぁ": "kwa"
|
|
181
|
+
"クァ": "kwa"
|
|
182
|
+
"ぐぁ": "gwa"
|
|
183
|
+
"グァ": "gwa"
|
|
184
|
+
"くぃ": "kwi"
|
|
185
|
+
"クィ": "kwi"
|
|
186
|
+
"ぐぃ": "gwa"
|
|
187
|
+
"グィ": "gwa"
|
|
188
|
+
"きぇ": "kye"
|
|
189
|
+
"キェ": "kye"
|
|
190
|
+
"ぎぇ": "gye"
|
|
191
|
+
"ギェ": "gye"
|
|
192
|
+
"くぇ": "kwe"
|
|
193
|
+
"クェ": "kwe"
|
|
194
|
+
"ぐぇ": "gwe"
|
|
195
|
+
"グェ": "gwe"
|
|
196
|
+
"くぉ": "kwo"
|
|
197
|
+
"クォ": "kwo"
|
|
198
|
+
"ぐぉ": "gwo"
|
|
199
|
+
"グォ": "gwo"
|
|
200
|
+
"しぇ": "she"
|
|
201
|
+
"シェ": "she"
|
|
202
|
+
"じぇ": "je"
|
|
203
|
+
"ジェ": "je"
|
|
204
|
+
"つぁ": "tsa"
|
|
205
|
+
"ツァ": "tsa"
|
|
206
|
+
"てぃ": "ti"
|
|
207
|
+
"ティ": "ti"
|
|
208
|
+
"でぃ": "di"
|
|
209
|
+
"ディ": "di"
|
|
210
|
+
"てゅ": "tyu"
|
|
211
|
+
"テュ": "tyu"
|
|
212
|
+
"でゅ": "dyu"
|
|
213
|
+
"デュ": "dyu"
|
|
214
|
+
"とゅ": "tu"
|
|
215
|
+
"トュ": "tu"
|
|
216
|
+
"どゅ": "du"
|
|
217
|
+
"ドュ": "du"
|
|
218
|
+
"ちぇ": "che"
|
|
219
|
+
"チェ": "che"
|
|
220
|
+
"ぢぇ": "je"
|
|
221
|
+
"ヂェ": "je"
|
|
222
|
+
"つぇ": "tse"
|
|
223
|
+
"ツェ": "tse"
|
|
224
|
+
"つぉ": "tso"
|
|
225
|
+
"ツォ": "tso"
|
|
226
|
+
"にぇ": "nye"
|
|
227
|
+
"ニェ": "nye"
|
|
228
|
+
"ふぁ": "fa"
|
|
229
|
+
"ファ": "fa"
|
|
230
|
+
"ふぃ": "fi"
|
|
231
|
+
"フィ": "fi"
|
|
232
|
+
"ふぇ": "fe"
|
|
233
|
+
"フェ": "fe"
|
|
234
|
+
"ふぉ": "fo"
|
|
235
|
+
"フォ": "fo"
|
|
236
|
+
"みぇ": "mye"
|
|
237
|
+
"ミェ": "mye"
|
|
238
|
+
"ぃぇ": "ye"
|
|
239
|
+
"ィェ": "ye"
|
|
240
|
+
"りぇ": "rye"
|
|
241
|
+
"リェ": "rye"
|
|
242
|
+
"ゔぁ": "va"
|
|
243
|
+
"ヴァ": "va"
|
|
244
|
+
"うぃ": "wi"
|
|
245
|
+
"ウィ": "wi"
|
|
246
|
+
"ゔぃ": "vi"
|
|
247
|
+
"ヴィ": "vi"
|
|
248
|
+
"うぇ": "we"
|
|
249
|
+
"ウェ": "we"
|
|
250
|
+
"ゔぇ": "ve"
|
|
251
|
+
"ヴェ": "ve"
|
|
252
|
+
"うぉ": "wo"
|
|
253
|
+
"ウォ": "wo"
|
|
254
|
+
"ゔぉ": "vo"
|
|
255
|
+
"ヴォ": "vo"
|
|
256
|
+
"ゔ": "vu"
|
|
257
|
+
"ヴ": "vu"
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 1981
|
|
4
|
+
language: kat
|
|
5
|
+
source_script: Geor
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: ROMANIZATION OF GEORGIAN; BGN/PCGN 1981 System
|
|
8
|
+
url: https://transliteration.eki.ee/pdf/Georgian.pdf
|
|
9
|
+
creation_date: 1981
|
|
10
|
+
confirmation_date: 1981
|
|
11
|
+
description: |
|
|
12
|
+
BGN/PCGN system of 1981.
|
|
13
|
+
|
|
14
|
+
notes:
|
|
15
|
+
|
|
16
|
+
tests:
|
|
17
|
+
- source: ჰებუდი
|
|
18
|
+
expected: hebudi
|
|
19
|
+
|
|
20
|
+
- source: ჯვრის წყალსაცავი
|
|
21
|
+
expected: jvris tsqalsats’avi
|
|
22
|
+
|
|
23
|
+
- source: ჯვავიაკვარა
|
|
24
|
+
expected: jvaviak’vara
|
|
25
|
+
|
|
26
|
+
- source: ჯობრია
|
|
27
|
+
expected: jobria
|
|
28
|
+
|
|
29
|
+
- source: ძულუხირა
|
|
30
|
+
expected: dzulukhira
|
|
31
|
+
|
|
32
|
+
- source: ლეკუხონა
|
|
33
|
+
expected: lek’ukhona
|
|
34
|
+
|
|
35
|
+
- source: აბაშა
|
|
36
|
+
expected: abasha
|
|
37
|
+
|
|
38
|
+
- source: ააცი
|
|
39
|
+
expected: aats’i
|
|
40
|
+
|
|
41
|
+
# TODO: This belongs to which system?!
|
|
42
|
+
# - source: აბააჟახვუ
|
|
43
|
+
# expected: abaazhvakhu
|
|
44
|
+
|
|
45
|
+
# TODO: These examples from GNDB are clearly using the BGNPCGN 2009 system
|
|
46
|
+
#
|
|
47
|
+
# - source: ხობის მუნიციპალიტეტი
|
|
48
|
+
# expected: khobis munitsip’alit’et’i
|
|
49
|
+
#
|
|
50
|
+
# - source: მყინვარი ჩრდილოეთი ლეადაშატი
|
|
51
|
+
# expected: mq’invari chrdiloeti leadashat’i
|
|
52
|
+
#
|
|
53
|
+
# - source: ხეწკვარა
|
|
54
|
+
# expected: khets’k’vara
|
|
55
|
+
#
|
|
56
|
+
# - source: ჯამპალი
|
|
57
|
+
# expected: jamp’ali
|
|
58
|
+
#
|
|
59
|
+
# - source: ჯავის მუნიციპალიტეტი
|
|
60
|
+
# expected: javis munitsip’alit’et’i
|
|
61
|
+
#
|
|
62
|
+
# - source: ხოიჯგეთა
|
|
63
|
+
# expected: khoijgeta
|
|
64
|
+
#
|
|
65
|
+
# - source: ხობის მუნიციპალიტეტი
|
|
66
|
+
# expected: khobis munitsip’alit’et’i
|
|
67
|
+
#
|
|
68
|
+
# - source: წვიშარხუ
|
|
69
|
+
# expected: ts’visharkhu
|
|
70
|
+
# - source: აღმოსავლეთი გუმისთა
|
|
71
|
+
# expected: aghmosavleti gumista
|
|
72
|
+
#
|
|
73
|
+
# - source: ქვემო ბირცხა
|
|
74
|
+
# expected: kvemo birtskha
|
|
75
|
+
#
|
|
76
|
+
# - source: ზემო ბირცხა
|
|
77
|
+
# expected: zemo birtskha
|
|
78
|
+
#
|
|
79
|
+
# - source: აბჟაყვა
|
|
80
|
+
# expected: abzhaq’va
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
map:
|
|
85
|
+
characters:
|
|
86
|
+
'\u10d0' : 'a' # ა
|
|
87
|
+
'\u10d1' : 'b' # ბ
|
|
88
|
+
'\u10d2' : 'g' # გ
|
|
89
|
+
'\u10d3' : 'd' # დ
|
|
90
|
+
'\u10d4' : 'e' # ე
|
|
91
|
+
'\u10d5' : 'v' # ვ
|
|
92
|
+
'\u10d6' : 'z' # ზ
|
|
93
|
+
|
|
94
|
+
'\u10f1' : 'ey' # ჱ
|
|
95
|
+
|
|
96
|
+
'\u10d7' : 't’' # თ
|
|
97
|
+
'\u10d8' : 'i' # ი
|
|
98
|
+
'\u10d9' : 'k’' # კ
|
|
99
|
+
'\u10da' : 'l' # ლ
|
|
100
|
+
'\u10db' : 'm' # მ
|
|
101
|
+
'\u10dc' : 'n' # ნ
|
|
102
|
+
|
|
103
|
+
'\u10f2' : 'j' # ჲ
|
|
104
|
+
|
|
105
|
+
'\u10dd' : 'o' # ო
|
|
106
|
+
'\u10de' : 'p' # პ
|
|
107
|
+
'\u10df' : 'zh' # ჟ
|
|
108
|
+
'\u10e0' : 'r' # რ
|
|
109
|
+
'\u10e1' : 's' # ს
|
|
110
|
+
'\u10e2' : 't' # ტ
|
|
111
|
+
'\u10e3' : 'u' # უ
|
|
112
|
+
'\u10e4' : 'p’' # ფ
|
|
113
|
+
'\u10e5' : 'k’' # ქ
|
|
114
|
+
'\u10e6' : 'gh' # ღ
|
|
115
|
+
'\u10e7' : 'q' # ყ
|
|
116
|
+
'\u10e8' : 'sh' # შ
|
|
117
|
+
'\u10e9' : 'ch’' # ჩ
|
|
118
|
+
'\u10ea' : 'ts’' # ც
|
|
119
|
+
'\u10eb' : 'dz' # ძ
|
|
120
|
+
'\u10ec' : 'ts' # წ
|
|
121
|
+
'\u10ed' : 'ch' # ჭ
|
|
122
|
+
'\u10ee' : 'kh' # ხ
|
|
123
|
+
|
|
124
|
+
'\u10f4' : 'q’' # ჴ
|
|
125
|
+
|
|
126
|
+
'\u10ef' : 'j' # ჯ
|
|
127
|
+
'\u10f0' : 'h' # ჰ
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 2009
|
|
4
|
+
language: kat
|
|
5
|
+
source_script: Geor
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: ROMANIZATION OF GEORGIAN; Georgia 2011 national system; BGN/PCGN 2009 Agreement
|
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/810202/ROMANIZATION_OF_GEORGIAN.pdf
|
|
9
|
+
creation_date: 2009
|
|
10
|
+
confirmation_date: 2011
|
|
11
|
+
description: |
|
|
12
|
+
This romanization system corresponds to that devised in 2002 by the
|
|
13
|
+
State Department of Geodesy and Cartography of Georgia and the
|
|
14
|
+
Institute of Linguistics of the Georgian Academy of Sciences, and
|
|
15
|
+
approved by Presidential Decree 109 of 24 February 2011. It represents
|
|
16
|
+
the Mkhedruli alphabet, as presently used in Georgia. This system was
|
|
17
|
+
adopted by BGN and PCGN in 2009, superseding the BGN/PCGN system of
|
|
18
|
+
1981.
|
|
19
|
+
|
|
20
|
+
notes:
|
|
21
|
+
- This system denotes ejective (glottalised) consonants by means of
|
|
22
|
+
an apostrophe. The BGN/PCGN 1981 system instead used the apostrophe to
|
|
23
|
+
denote aspirated consonants (letters 8, 21, 22, 26 & 27).
|
|
24
|
+
- The Romanization columns show only lowercase forms but, when
|
|
25
|
+
romanizing, uppercase and lowercase Roman letters as appropriate should
|
|
26
|
+
be used.
|
|
27
|
+
|
|
28
|
+
tests:
|
|
29
|
+
- source: თბილისი
|
|
30
|
+
expected: tbilisi
|
|
31
|
+
|
|
32
|
+
- source: მეღვინეთუხუცესი
|
|
33
|
+
expected: meghvinetukhutsesi
|
|
34
|
+
|
|
35
|
+
- source: ჭიანჭველა
|
|
36
|
+
expected: ch’ianch’vela
|
|
37
|
+
|
|
38
|
+
- source: ბაყაყი
|
|
39
|
+
expected: baq’aq’i
|
|
40
|
+
|
|
41
|
+
map:
|
|
42
|
+
inherit: "ggg-kat-Geor-Latn-2002"
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: kn-1945
|
|
4
|
+
language: kor
|
|
5
|
+
source_script: Hang
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: BGN/PCGN 1945 Agreement
|
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693725/ROMANIZATION_OF_KOREAN-_MR_for_DPRK.pdf
|
|
9
|
+
creation_date: 1945
|
|
10
|
+
adoption_date:
|
|
11
|
+
description:
|
|
12
|
+
|
|
13
|
+
notes: "
|
|
14
|
+
|
|
15
|
+
1. At the end of a syllable, the character ᄋ should be romanized ng,
|
|
16
|
+
as in the following example:
|
|
17
|
+
|
|
18
|
+
평양 → P’yŏngyang
|
|
19
|
+
|
|
20
|
+
At the beginning of a syllable, the character ᄋ is silent and
|
|
21
|
+
should not be romanized. An example follows:
|
|
22
|
+
|
|
23
|
+
용화 → Yonghwa
|
|
24
|
+
|
|
25
|
+
2. Syllable boundaries within words are not reflected in romanization.
|
|
26
|
+
In the different types of syllables shown in the table below, C
|
|
27
|
+
represents any consonant character, V represents any vowel character
|
|
28
|
+
and / represents a syllable boundary.
|
|
29
|
+
|
|
30
|
+
Han’gŭl 개성 남포 안양
|
|
31
|
+
Syllable boundaries CV/CVC CVC/CV VC/VC
|
|
32
|
+
Romanization Kaesŏng Namp’o Anyang
|
|
33
|
+
|
|
34
|
+
3. Euphonic changes occurring within a word, including between the
|
|
35
|
+
specific and generic of a geographical name, should be reflected in
|
|
36
|
+
romanization. Generic terms are usually seen separated from the name
|
|
37
|
+
by a hyphen and with a lower case initial letter rather than as a
|
|
38
|
+
separate word:
|
|
39
|
+
|
|
40
|
+
영진리 → Yŏngjil-li
|
|
41
|
+
덕흥리 → Tŏkhŭng-ni
|
|
42
|
+
압록강 → Amnok-kang
|
|
43
|
+
대동강 → Taedong-gang
|
|
44
|
+
|
|
45
|
+
4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
|
|
46
|
+
published in North Korea in 1966), unlike the Korean spoken in the
|
|
47
|
+
Republic of Korea, the language spoken in the Democratic People’s
|
|
48
|
+
Republic of Korea maintains and pronounces the word-initial ᆯ (‘r’).
|
|
49
|
+
The use of the word-initial ᄅ ('r') can be seen in official news
|
|
50
|
+
reports as well as native mapping. Since such examples exist, the
|
|
51
|
+
word initial ᄅ ('r') is reflected as an option in the tables given above.
|
|
52
|
+
|
|
53
|
+
5. The Romanization column shows only lowercase forms but, when romanizing,
|
|
54
|
+
uppercase and lowercase Roman letters as appropriate should be used.
|
|
55
|
+
"
|
|
56
|
+
|
|
57
|
+
tests:
|
|
58
|
+
- source: "평양"
|
|
59
|
+
expected: "P’yŏngyang"
|
|
60
|
+
- source: "용화"
|
|
61
|
+
expected: "Yonghwa"
|
|
62
|
+
- source: "개성"
|
|
63
|
+
expected: "Kaesŏng"
|
|
64
|
+
- source: "남포"
|
|
65
|
+
expected: "Namp’o"
|
|
66
|
+
- source: "안양"
|
|
67
|
+
expected: "Anyang"
|
|
68
|
+
- source: "영진-리"
|
|
69
|
+
expected: "Yŏngjil-li"
|
|
70
|
+
- source: "덕흥-리"
|
|
71
|
+
expected: "Tŏkhŭng-ni"
|
|
72
|
+
- source: "압록-강"
|
|
73
|
+
expected: "Amnok-kang"
|
|
74
|
+
- source: "대동-강"
|
|
75
|
+
expected: "Taedong-gang"
|
|
76
|
+
- source: "라선특별시"
|
|
77
|
+
expected: "Rasŏnt’ŭkpyŏlsi"
|
|
78
|
+
- source: 은하-리
|
|
79
|
+
expected: "Ŭnha-ri"
|
|
80
|
+
- source: 은중-리
|
|
81
|
+
expected: "Ŭnjung-ni"
|
|
82
|
+
- source: 은장-령
|
|
83
|
+
expected: "Ŭnjang-nyŏng"
|
|
84
|
+
- source: 은혜-동
|
|
85
|
+
expected: "Ŭnhye-dong"
|
|
86
|
+
- source: 은호-리
|
|
87
|
+
expected: "Ŭnho-ri"
|
|
88
|
+
- source: 은행정
|
|
89
|
+
expected: "Ŭnhaengjŏng"
|
|
90
|
+
- source: 은행-동
|
|
91
|
+
expected: "Ŭnhaeng-dong"
|
|
92
|
+
- source: 은행-촌
|
|
93
|
+
expected: "Ŭnhaeng-ch’on"
|
|
94
|
+
- source: 원수
|
|
95
|
+
expected: "Wŏnsu"
|
|
96
|
+
- source: 원소리-고개
|
|
97
|
+
expected: "Wŏnsori-gogae"
|
|
98
|
+
- source: 원소참
|
|
99
|
+
expected: "Wŏnsoch’am"
|
|
100
|
+
- source: 원소-리
|
|
101
|
+
expected: "Wŏnso-ri"
|
|
102
|
+
- source: 원신-리
|
|
103
|
+
expected: "Wŏnsil-li"
|
|
104
|
+
- source: 난곡
|
|
105
|
+
expected: "Nan’gok"
|
|
106
|
+
- source: 난산-리
|
|
107
|
+
expected: "Nansal-li"
|
|
108
|
+
- source: 난직
|
|
109
|
+
expected: "Nanjik"
|
|
110
|
+
- source: 영곡
|
|
111
|
+
expected: "Yŏnggok"
|
|
112
|
+
- source: 윗두밀
|
|
113
|
+
expected: "Wittumil"
|
|
114
|
+
- source: 윗도심이
|
|
115
|
+
expected: "Wittosimi"
|
|
116
|
+
- source: 둔지
|
|
117
|
+
expected: "Tunji"
|
|
118
|
+
- source: 서승
|
|
119
|
+
expected: "Sŏsŭng"
|
|
120
|
+
- source: 신촌
|
|
121
|
+
expected: "Sinch’on"
|
|
122
|
+
- source: 비암덕
|
|
123
|
+
expected: "Piamdŏk"
|
|
124
|
+
- source: 바위안
|
|
125
|
+
expected: "Pawian"
|
|
126
|
+
- source: 오송평
|
|
127
|
+
expected: "Osongp’yŏng"
|
|
128
|
+
- source: 그물목
|
|
129
|
+
expected: "Kŭmulmok"
|
|
130
|
+
- source: 구원정
|
|
131
|
+
expected: "Kuwŏnjŏng"
|
|
132
|
+
- source: 일하
|
|
133
|
+
expected: "Irha"
|
|
134
|
+
- source: 황우
|
|
135
|
+
expected: "Hwangu"
|
|
136
|
+
- source: 자작보
|
|
137
|
+
expected: "Chajakpo"
|
|
138
|
+
- source: 비파1-동
|
|
139
|
+
expected: "Pip’a Il-tong"
|
|
140
|
+
- source: 문암 오-동
|
|
141
|
+
expected: "Munam O-dong"
|
|
142
|
+
|
|
143
|
+
map:
|
|
144
|
+
character_separator: ""
|
|
145
|
+
word_separator: " "
|
|
146
|
+
title_case: True
|
|
147
|
+
inherit: [var-kor-Hang-Latn-mr-1939]
|
|
148
|
+
|
|
149
|
+
rules:
|
|
150
|
+
# Add Zero-width White-space U+200B after spaces (i.e. before word boundaries)
|
|
151
|
+
# So that the word-initial conversion rules will be blocked.
|
|
152
|
+
- pattern: "^"
|
|
153
|
+
result: "\u200B"
|
|
154
|
+
- pattern: "(?<= )"
|
|
155
|
+
result: "\u200B"
|
|
156
|
+
|
|
157
|
+
# convert numbers to space + Hangul
|
|
158
|
+
- pattern: "([^0-9 ])(?=[0-9])"
|
|
159
|
+
result: "\\1 "
|
|
160
|
+
- pattern: "1"
|
|
161
|
+
result: "일"
|
|
162
|
+
- pattern: "2"
|
|
163
|
+
result: "이"
|
|
164
|
+
- pattern: "3"
|
|
165
|
+
result: "삼"
|
|
166
|
+
- pattern: "4"
|
|
167
|
+
result: "사"
|
|
168
|
+
- pattern: "5"
|
|
169
|
+
result: "오"
|
|
170
|
+
- pattern: "6"
|
|
171
|
+
result: "육"
|
|
172
|
+
- pattern: "7"
|
|
173
|
+
result: "칠"
|
|
174
|
+
- pattern: "8"
|
|
175
|
+
result: "팔"
|
|
176
|
+
- pattern: "9"
|
|
177
|
+
result: "구"
|
|
178
|
+
|
|
179
|
+
# This is a logic to add hyphen in front of generics
|
|
180
|
+
# - pattern: "(?<=.)(구역|동|리|도|고개|골|로동자구|사무소|초등학교|중학교|고등학교|강|포|령|역|봉|사|천|교|제|저수지|소류지|재|못|말|면|암|교회|촌|병원|바위|공원|섬|우체국|대학교|보건소|굴|치|대교|지구|폭포|해수욕장|휴게소|중고교|읍|보건진료소|마을|톨게이트|대학|시장|경찰서|학교)$" #to be expanded
|
|
181
|
+
# result: "-\\1"
|
|
182
|
+
|
|
183
|
+
postrules:
|
|
184
|
+
|
|
185
|
+
# Add space to the two ends of the string for easier word boundary handling
|
|
186
|
+
- pattern: "^"
|
|
187
|
+
result: " "
|
|
188
|
+
- pattern: "$"
|
|
189
|
+
result: " "
|
|
190
|
+
|
|
191
|
+
# Initial rules in the inherited map were blocked, so that
|
|
192
|
+
# this set of updated rules (with the onset rules removed) will be used instead.
|
|
193
|
+
- pattern: "\u200B"
|
|
194
|
+
result: ""
|
|
195
|
+
|
|
196
|
+
- pattern: "(?<= )ᄀ"
|
|
197
|
+
result: "k" # HANGUL CHOSEONG KIYEOK
|
|
198
|
+
- pattern: "(?<= )ᄂ"
|
|
199
|
+
result: "n" # HANGUL CHOSEONG NIEUN
|
|
200
|
+
- pattern: "(?<= )ᄃ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
|
|
201
|
+
result: "ch" # HANGUL CHOSEONG TIEUT # T -> Ch before yotized vowels
|
|
202
|
+
- pattern: "(?<= )ᄃ"
|
|
203
|
+
result: "t" # HANGUL CHOSEONG TIEUT
|
|
204
|
+
- pattern: "(?<= )ᄅ"
|
|
205
|
+
result: "r" # HANGUL CHOSEONG RIEUL
|
|
206
|
+
- pattern: "(?<= )ᄆ"
|
|
207
|
+
result: "m" # HANGUL CHOSEONG MIEUM
|
|
208
|
+
- pattern: "(?<= )ᄇ"
|
|
209
|
+
result: "p" # HANGUL CHOSEONG PIEUP
|
|
210
|
+
- pattern: "(?<= )ᄉ(?=ᅱ)"
|
|
211
|
+
result: "sh" # HANGUL CHOSEONG SIOS
|
|
212
|
+
- pattern: "(?<= )ᄉ"
|
|
213
|
+
result: "s" # HANGUL CHOSEONG SIOS
|
|
214
|
+
- pattern: "(?<= )ᄋ"
|
|
215
|
+
result: "" # HANGUL CHOSEONG IEUNG
|
|
216
|
+
- pattern: "(?<= )ᄌ"
|
|
217
|
+
result: "ch" # HANGUL CHOSEONG CIEUC
|
|
218
|
+
- pattern: "(?<= )ᄎ"
|
|
219
|
+
result: "ch’" # HANGUL CHOSEONG CHIEUCH
|
|
220
|
+
- pattern: "(?<= )ᄏ"
|
|
221
|
+
result: "k’" # HANGUL CHOSEONG KHIEUKH
|
|
222
|
+
- pattern: "(?<= )ᄐ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
|
|
223
|
+
result: "ch’" # HANGUL CHOSEONG THIEUTH + YOTIZED VOWELS
|
|
224
|
+
- pattern: "(?<= )ᄐ"
|
|
225
|
+
result: "t’" # HANGUL CHOSEONG THIEUTH
|
|
226
|
+
- pattern: "(?<= )ᄑ"
|
|
227
|
+
result: "p’" # HANGUL CHOSEONG PHIEUPH
|
|
228
|
+
- pattern: "(?<= )ᄒ"
|
|
229
|
+
result: "h" # HANGUL CHOSEONG HIEUH
|
|
230
|
+
- pattern: "(?<= )ᄁ"
|
|
231
|
+
result: "kk" # HANGUL CHOSEONG SSANGKIYEOK
|
|
232
|
+
- pattern: "(?<= )ᄭ"
|
|
233
|
+
result: "kk" # HANGUL CHOSEONG SIOS-KIYEOK
|
|
234
|
+
- pattern: "(?<= )ᄄ"
|
|
235
|
+
result: "tt" # HANGUL CHOSEONG SSANGTIEUT
|
|
236
|
+
- pattern: "(?<= )ᄯ"
|
|
237
|
+
result: "tt" # HANGUL CHOSEONG SIOS-TIEUT
|
|
238
|
+
- pattern: "(?<= )ᄈ"
|
|
239
|
+
result: "pp" # HANGUL CHOSEONG SSANGPIEUP
|
|
240
|
+
- pattern: "(?<= )ᄲ"
|
|
241
|
+
result: "pp" # HANGUL CHOSEONG SIOS-PIEUP
|
|
242
|
+
- pattern: "(?<= )ᄊ"
|
|
243
|
+
result: "ss" # HANGUL CHOSEONG SSANGSIOS
|
|
244
|
+
- pattern: "(?<= )ᄍ"
|
|
245
|
+
result: "tch" # HANGUL CHOSEONG SSANGCIEUC
|
|
246
|
+
- pattern: "(?<= )ᄶ"
|
|
247
|
+
result: "tch" # HANGUL CHOSEONG SIOS-CIEUC
|
|
248
|
+
|
|
249
|
+
# Remove space added
|
|
250
|
+
- pattern: "^ "
|
|
251
|
+
result: ""
|
|
252
|
+
- pattern: " $"
|
|
253
|
+
result: ""
|