interscript 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +246 -14
- data/bin/interscript +38 -17
- data/bin/setup +8 -0
- data/lib/g2pwrapper.py +34 -0
- data/lib/interscript.rb +140 -16
- data/lib/interscript/command.rb +27 -0
- data/lib/interscript/mapping.rb +125 -0
- data/lib/interscript/version.rb +1 -1
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
- data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
- data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
- data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
- data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
- data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
- data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
- data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
- data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
- data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
- data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
- data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
- data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
- data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
- data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
- data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
- data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
- data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
- data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
- data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
- data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
- data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
- data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
- data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
- data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
- data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
- data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
- data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
- data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
- data/spec/interscript/mapping_spec.rb +42 -0
- data/spec/interscript_spec.rb +20 -5
- data/spec/spec_helper.rb +3 -1
- metadata +149 -24
- data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
- data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
- data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
- data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
- data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
- data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
- data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: kn-1945
|
|
4
|
+
language: kor
|
|
5
|
+
source_script: Hang
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: BGN/PCGN 1945 Agreement
|
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693725/ROMANIZATION_OF_KOREAN-_MR_for_DPRK.pdf
|
|
9
|
+
creation_date: 1945
|
|
10
|
+
adoption_date:
|
|
11
|
+
description:
|
|
12
|
+
|
|
13
|
+
notes: "
|
|
14
|
+
|
|
15
|
+
1. At the end of a syllable, the character ᄋ should be romanized ng,
|
|
16
|
+
as in the following example:
|
|
17
|
+
|
|
18
|
+
평양 → P’yŏngyang
|
|
19
|
+
|
|
20
|
+
At the beginning of a syllable, the character ᄋ is silent and
|
|
21
|
+
should not be romanized. An example follows:
|
|
22
|
+
|
|
23
|
+
용화 → Yonghwa
|
|
24
|
+
|
|
25
|
+
2. Syllable boundaries within words are not reflected in romanization.
|
|
26
|
+
In the different types of syllables shown in the table below, C
|
|
27
|
+
represents any consonant character, V represents any vowel character
|
|
28
|
+
and / represents a syllable boundary.
|
|
29
|
+
|
|
30
|
+
Han’gŭl 개성 남포 안양
|
|
31
|
+
Syllable boundaries CV/CVC CVC/CV VC/VC
|
|
32
|
+
Romanization Kaesŏng Namp’o Anyang
|
|
33
|
+
|
|
34
|
+
3. Euphonic changes occurring within a word, including between the
|
|
35
|
+
specific and generic of a geographical name, should be reflected in
|
|
36
|
+
romanization. Generic terms are usually seen separated from the name
|
|
37
|
+
by a hyphen and with a lower case initial letter rather than as a
|
|
38
|
+
separate word:
|
|
39
|
+
|
|
40
|
+
영진리 → Yŏngjil-li
|
|
41
|
+
덕흥리 → Tŏkhŭng-ni
|
|
42
|
+
압록강 → Amnok-kang
|
|
43
|
+
대동강 → Taedong-gang
|
|
44
|
+
|
|
45
|
+
4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
|
|
46
|
+
published in North Korea in 1966), unlike the Korean spoken in the
|
|
47
|
+
Republic of Korea, the language spoken in the Democratic People’s
|
|
48
|
+
Republic of Korea maintains and pronounces the word-initial ᆯ (‘r’).
|
|
49
|
+
The use of the word-initial ᄅ ('r') can be seen in official news
|
|
50
|
+
reports as well as native mapping. Since such examples exist, the
|
|
51
|
+
word initial ᄅ ('r') is reflected as an option in the tables given above.
|
|
52
|
+
|
|
53
|
+
5. The Romanization column shows only lowercase forms but, when romanizing,
|
|
54
|
+
uppercase and lowercase Roman letters as appropriate should be used.
|
|
55
|
+
"
|
|
56
|
+
|
|
57
|
+
tests:
|
|
58
|
+
- source: "평양"
|
|
59
|
+
expected: "P’yŏngyang"
|
|
60
|
+
- source: "용화"
|
|
61
|
+
expected: "Yonghwa"
|
|
62
|
+
- source: "개성"
|
|
63
|
+
expected: "Kaesŏng"
|
|
64
|
+
- source: "남포"
|
|
65
|
+
expected: "Namp’o"
|
|
66
|
+
- source: "안양"
|
|
67
|
+
expected: "Anyang"
|
|
68
|
+
- source: "영진-리"
|
|
69
|
+
expected: "Yŏngjil-li"
|
|
70
|
+
- source: "덕흥-리"
|
|
71
|
+
expected: "Tŏkhŭng-ni"
|
|
72
|
+
- source: "압록-강"
|
|
73
|
+
expected: "Amnok-kang"
|
|
74
|
+
- source: "대동-강"
|
|
75
|
+
expected: "Taedong-gang"
|
|
76
|
+
- source: "라선특별시"
|
|
77
|
+
expected: "Rasŏnt’ŭkpyŏlsi"
|
|
78
|
+
- source: 은하-리
|
|
79
|
+
expected: "Ŭnha-ri"
|
|
80
|
+
- source: 은중-리
|
|
81
|
+
expected: "Ŭnjung-ni"
|
|
82
|
+
- source: 은장-령
|
|
83
|
+
expected: "Ŭnjang-nyŏng"
|
|
84
|
+
- source: 은혜-동
|
|
85
|
+
expected: "Ŭnhye-dong"
|
|
86
|
+
- source: 은호-리
|
|
87
|
+
expected: "Ŭnho-ri"
|
|
88
|
+
- source: 은행정
|
|
89
|
+
expected: "Ŭnhaengjŏng"
|
|
90
|
+
- source: 은행-동
|
|
91
|
+
expected: "Ŭnhaeng-dong"
|
|
92
|
+
- source: 은행-촌
|
|
93
|
+
expected: "Ŭnhaeng-ch’on"
|
|
94
|
+
- source: 원수
|
|
95
|
+
expected: "Wŏnsu"
|
|
96
|
+
- source: 원소리-고개
|
|
97
|
+
expected: "Wŏnsori-gogae"
|
|
98
|
+
- source: 원소참
|
|
99
|
+
expected: "Wŏnsoch’am"
|
|
100
|
+
- source: 원소-리
|
|
101
|
+
expected: "Wŏnso-ri"
|
|
102
|
+
- source: 원신-리
|
|
103
|
+
expected: "Wŏnsil-li"
|
|
104
|
+
- source: 난곡
|
|
105
|
+
expected: "Nan’gok"
|
|
106
|
+
- source: 난산-리
|
|
107
|
+
expected: "Nansal-li"
|
|
108
|
+
- source: 난직
|
|
109
|
+
expected: "Nanjik"
|
|
110
|
+
- source: 영곡
|
|
111
|
+
expected: "Yŏnggok"
|
|
112
|
+
- source: 윗두밀
|
|
113
|
+
expected: "Wittumil"
|
|
114
|
+
- source: 윗도심이
|
|
115
|
+
expected: "Wittosimi"
|
|
116
|
+
- source: 둔지
|
|
117
|
+
expected: "Tunji"
|
|
118
|
+
- source: 서승
|
|
119
|
+
expected: "Sŏsŭng"
|
|
120
|
+
- source: 신촌
|
|
121
|
+
expected: "Sinch’on"
|
|
122
|
+
- source: 비암덕
|
|
123
|
+
expected: "Piamdŏk"
|
|
124
|
+
- source: 바위안
|
|
125
|
+
expected: "Pawian"
|
|
126
|
+
- source: 오송평
|
|
127
|
+
expected: "Osongp’yŏng"
|
|
128
|
+
- source: 그물목
|
|
129
|
+
expected: "Kŭmulmok"
|
|
130
|
+
- source: 구원정
|
|
131
|
+
expected: "Kuwŏnjŏng"
|
|
132
|
+
- source: 일하
|
|
133
|
+
expected: "Irha"
|
|
134
|
+
- source: 황우
|
|
135
|
+
expected: "Hwangu"
|
|
136
|
+
- source: 자작보
|
|
137
|
+
expected: "Chajakpo"
|
|
138
|
+
- source: 비파1-동
|
|
139
|
+
expected: "Pip’a Il-tong"
|
|
140
|
+
- source: 문암 오-동
|
|
141
|
+
expected: "Munam O-dong"
|
|
142
|
+
|
|
143
|
+
map:
|
|
144
|
+
character_separator: ""
|
|
145
|
+
word_separator: " "
|
|
146
|
+
title_case: True
|
|
147
|
+
inherit: [var-kor-Hang-Latn-mr-1939]
|
|
148
|
+
|
|
149
|
+
rules:
|
|
150
|
+
# Add Zero-width White-space U+200B after spaces (i.e. before word boundaries)
|
|
151
|
+
# So that the word-initial conversion rules will be blocked.
|
|
152
|
+
- pattern: "^"
|
|
153
|
+
result: "\u200B"
|
|
154
|
+
- pattern: "(?<= )"
|
|
155
|
+
result: "\u200B"
|
|
156
|
+
|
|
157
|
+
# convert numbers to space + Hangul
|
|
158
|
+
- pattern: "([^0-9 ])(?=[0-9])"
|
|
159
|
+
result: "\\1 "
|
|
160
|
+
- pattern: "1"
|
|
161
|
+
result: "일"
|
|
162
|
+
- pattern: "2"
|
|
163
|
+
result: "이"
|
|
164
|
+
- pattern: "3"
|
|
165
|
+
result: "삼"
|
|
166
|
+
- pattern: "4"
|
|
167
|
+
result: "사"
|
|
168
|
+
- pattern: "5"
|
|
169
|
+
result: "오"
|
|
170
|
+
- pattern: "6"
|
|
171
|
+
result: "육"
|
|
172
|
+
- pattern: "7"
|
|
173
|
+
result: "칠"
|
|
174
|
+
- pattern: "8"
|
|
175
|
+
result: "팔"
|
|
176
|
+
- pattern: "9"
|
|
177
|
+
result: "구"
|
|
178
|
+
|
|
179
|
+
# This is a logic to add hyphen in front of generics
|
|
180
|
+
# - pattern: "(?<=.)(구역|동|리|도|고개|골|로동자구|사무소|초등학교|중학교|고등학교|강|포|령|역|봉|사|천|교|제|저수지|소류지|재|못|말|면|암|교회|촌|병원|바위|공원|섬|우체국|대학교|보건소|굴|치|대교|지구|폭포|해수욕장|휴게소|중고교|읍|보건진료소|마을|톨게이트|대학|시장|경찰서|학교)$" #to be expanded
|
|
181
|
+
# result: "-\\1"
|
|
182
|
+
|
|
183
|
+
postrules:
|
|
184
|
+
|
|
185
|
+
# Add space to the two ends of the string for easier word boundary handling
|
|
186
|
+
- pattern: "^"
|
|
187
|
+
result: " "
|
|
188
|
+
- pattern: "$"
|
|
189
|
+
result: " "
|
|
190
|
+
|
|
191
|
+
# Initial rules in the inherited map were blocked, so that
|
|
192
|
+
# this set of updated rules (with the onset rules removed) will be used instead.
|
|
193
|
+
- pattern: "\u200B"
|
|
194
|
+
result: ""
|
|
195
|
+
|
|
196
|
+
- pattern: "(?<= )ᄀ"
|
|
197
|
+
result: "k" # HANGUL CHOSEONG KIYEOK
|
|
198
|
+
- pattern: "(?<= )ᄂ"
|
|
199
|
+
result: "n" # HANGUL CHOSEONG NIEUN
|
|
200
|
+
- pattern: "(?<= )ᄃ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
|
|
201
|
+
result: "ch" # HANGUL CHOSEONG TIEUT # T -> Ch before yotized vowels
|
|
202
|
+
- pattern: "(?<= )ᄃ"
|
|
203
|
+
result: "t" # HANGUL CHOSEONG TIEUT
|
|
204
|
+
- pattern: "(?<= )ᄅ"
|
|
205
|
+
result: "r" # HANGUL CHOSEONG RIEUL
|
|
206
|
+
- pattern: "(?<= )ᄆ"
|
|
207
|
+
result: "m" # HANGUL CHOSEONG MIEUM
|
|
208
|
+
- pattern: "(?<= )ᄇ"
|
|
209
|
+
result: "p" # HANGUL CHOSEONG PIEUP
|
|
210
|
+
- pattern: "(?<= )ᄉ(?=ᅱ)"
|
|
211
|
+
result: "sh" # HANGUL CHOSEONG SIOS
|
|
212
|
+
- pattern: "(?<= )ᄉ"
|
|
213
|
+
result: "s" # HANGUL CHOSEONG SIOS
|
|
214
|
+
- pattern: "(?<= )ᄋ"
|
|
215
|
+
result: "" # HANGUL CHOSEONG IEUNG
|
|
216
|
+
- pattern: "(?<= )ᄌ"
|
|
217
|
+
result: "ch" # HANGUL CHOSEONG CIEUC
|
|
218
|
+
- pattern: "(?<= )ᄎ"
|
|
219
|
+
result: "ch’" # HANGUL CHOSEONG CHIEUCH
|
|
220
|
+
- pattern: "(?<= )ᄏ"
|
|
221
|
+
result: "k’" # HANGUL CHOSEONG KHIEUKH
|
|
222
|
+
- pattern: "(?<= )ᄐ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
|
|
223
|
+
result: "ch’" # HANGUL CHOSEONG THIEUTH + YOTIZED VOWELS
|
|
224
|
+
- pattern: "(?<= )ᄐ"
|
|
225
|
+
result: "t’" # HANGUL CHOSEONG THIEUTH
|
|
226
|
+
- pattern: "(?<= )ᄑ"
|
|
227
|
+
result: "p’" # HANGUL CHOSEONG PHIEUPH
|
|
228
|
+
- pattern: "(?<= )ᄒ"
|
|
229
|
+
result: "h" # HANGUL CHOSEONG HIEUH
|
|
230
|
+
- pattern: "(?<= )ᄁ"
|
|
231
|
+
result: "kk" # HANGUL CHOSEONG SSANGKIYEOK
|
|
232
|
+
- pattern: "(?<= )ᄭ"
|
|
233
|
+
result: "kk" # HANGUL CHOSEONG SIOS-KIYEOK
|
|
234
|
+
- pattern: "(?<= )ᄄ"
|
|
235
|
+
result: "tt" # HANGUL CHOSEONG SSANGTIEUT
|
|
236
|
+
- pattern: "(?<= )ᄯ"
|
|
237
|
+
result: "tt" # HANGUL CHOSEONG SIOS-TIEUT
|
|
238
|
+
- pattern: "(?<= )ᄈ"
|
|
239
|
+
result: "pp" # HANGUL CHOSEONG SSANGPIEUP
|
|
240
|
+
- pattern: "(?<= )ᄲ"
|
|
241
|
+
result: "pp" # HANGUL CHOSEONG SIOS-PIEUP
|
|
242
|
+
- pattern: "(?<= )ᄊ"
|
|
243
|
+
result: "ss" # HANGUL CHOSEONG SSANGSIOS
|
|
244
|
+
- pattern: "(?<= )ᄍ"
|
|
245
|
+
result: "tch" # HANGUL CHOSEONG SSANGCIEUC
|
|
246
|
+
- pattern: "(?<= )ᄶ"
|
|
247
|
+
result: "tch" # HANGUL CHOSEONG SIOS-CIEUC
|
|
248
|
+
|
|
249
|
+
# Remove space added
|
|
250
|
+
- pattern: "^ "
|
|
251
|
+
result: ""
|
|
252
|
+
- pattern: " $"
|
|
253
|
+
result: ""
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 2011
|
|
4
|
+
language: kor
|
|
5
|
+
source_script: Hang
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
|
|
8
|
+
url:
|
|
9
|
+
creation_date:
|
|
10
|
+
adoption_date:
|
|
11
|
+
description:
|
|
12
|
+
|
|
13
|
+
notes:
|
|
14
|
+
BGN/PCGN 2011 Agreement
|
|
15
|
+
|
|
16
|
+
tests:
|
|
17
|
+
- source: 불국사
|
|
18
|
+
expected: "Bulguksa"
|
|
19
|
+
- source: 묵호
|
|
20
|
+
expected: "Mukho"
|
|
21
|
+
- source: 울산
|
|
22
|
+
expected: "Ulsan"
|
|
23
|
+
- source: 독립문
|
|
24
|
+
expected: "Dongnimmun"
|
|
25
|
+
- source: 강남역
|
|
26
|
+
expected: "Gangnamyeok"
|
|
27
|
+
- source: 남산리
|
|
28
|
+
expected: "Namsan-ri" #Note: no assimilation for -ri even after nasals
|
|
29
|
+
- source: 내월리
|
|
30
|
+
expected: "Naewol-ri"
|
|
31
|
+
- source: 울릉군
|
|
32
|
+
expected: "Ulleung-gun"
|
|
33
|
+
- source: 설악산
|
|
34
|
+
expected: "Seoraksan"
|
|
35
|
+
- source: 삼죽면
|
|
36
|
+
expected: "Samjuk-myeon"
|
|
37
|
+
- source: 평리1동
|
|
38
|
+
expected: "Pyeongni Il-dong"
|
|
39
|
+
- source: 평리2동
|
|
40
|
+
expected: "Pyeongni I-dong"
|
|
41
|
+
- source: 탑안이
|
|
42
|
+
expected: "Tabani"
|
|
43
|
+
|
|
44
|
+
map:
|
|
45
|
+
character_separator: ""
|
|
46
|
+
word_separator: " "
|
|
47
|
+
title_case: True
|
|
48
|
+
inherit: moct-kor-Hang-Latn-2000
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 2011
|
|
4
|
+
language: kor
|
|
5
|
+
source_script: Kore
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
|
|
8
|
+
url:
|
|
9
|
+
creation_date:
|
|
10
|
+
adoption_date:
|
|
11
|
+
description:
|
|
12
|
+
|
|
13
|
+
notes:
|
|
14
|
+
BGN/PCGN 2011 Agreement
|
|
15
|
+
|
|
16
|
+
tests:
|
|
17
|
+
- source: 佛國寺
|
|
18
|
+
expected: "Bulguksa"
|
|
19
|
+
- source: 묵호
|
|
20
|
+
expected: "Mukho"
|
|
21
|
+
- source: 蔚山
|
|
22
|
+
expected: "Ulsan"
|
|
23
|
+
- source: 獨立門
|
|
24
|
+
expected: "Dongnimmun"
|
|
25
|
+
- source: 江南驛
|
|
26
|
+
expected: "Gangnamyeok"
|
|
27
|
+
- source: 南山里
|
|
28
|
+
expected: "Namsan-ri" #Note: no assimilation for -ri even after nasals
|
|
29
|
+
- source: 내월里
|
|
30
|
+
expected: "Naewol-ri"
|
|
31
|
+
- source: 鬱陵郡
|
|
32
|
+
expected: "Ulleung-gun"
|
|
33
|
+
- source: 雪嶽山
|
|
34
|
+
expected: "Seoraksan"
|
|
35
|
+
- source: 三竹面
|
|
36
|
+
expected: "Samjuk-myeon"
|
|
37
|
+
- source: 坪里1洞
|
|
38
|
+
expected: "Pyeongni Il-dong"
|
|
39
|
+
- source: 坪里2洞
|
|
40
|
+
expected: "Pyeongni I-dong"
|
|
41
|
+
- source: 탑안이
|
|
42
|
+
expected: "Tabani"
|
|
43
|
+
|
|
44
|
+
map:
|
|
45
|
+
character_separator: ""
|
|
46
|
+
word_separator: " "
|
|
47
|
+
title_case: True
|
|
48
|
+
inherit: [var-kor-Kore-Hang-2013, moct-kor-Hang-Latn-2000]
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 1981
|
|
4
|
+
language: mkd
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Makedonian Romanization, BGN/PCGN 1981 System
|
|
8
|
+
url: https://github.com/riboseinc/interscript/files/4247920/USBGN_romanization_Macedonian_1981.pdf
|
|
9
|
+
creation_date: 1981
|
|
10
|
+
description: BGN/PCGN Romanization table for Makedonian.
|
|
11
|
+
|
|
12
|
+
tests:
|
|
13
|
+
- source: Ѓол
|
|
14
|
+
expected: Đol
|
|
15
|
+
- source: Јусек Тепеси
|
|
16
|
+
expected: Jusek Tepesi
|
|
17
|
+
- source: Љуги Ќарит
|
|
18
|
+
expected: Ljugi Ćarit
|
|
19
|
+
- source: Ќафа Сан
|
|
20
|
+
expected: Ćafa San
|
|
21
|
+
- source: Агроплод Ресен
|
|
22
|
+
expected: Agroplod Resen
|
|
23
|
+
- source: Алта Чука
|
|
24
|
+
expected: Alta Čuka
|
|
25
|
+
- source: Баш Тепе
|
|
26
|
+
expected: Baš Tepe
|
|
27
|
+
- source: Браќам
|
|
28
|
+
expected: Braćam
|
|
29
|
+
- source: Винарска Визба Агропин
|
|
30
|
+
expected: Vinarska Vizba Agropin
|
|
31
|
+
- source: Галичица
|
|
32
|
+
expected: Galičica
|
|
33
|
+
- source: Дрењево
|
|
34
|
+
expected: Drenjevo
|
|
35
|
+
- source: Енешево
|
|
36
|
+
expected: Eneševo
|
|
37
|
+
- source: Иберлија
|
|
38
|
+
expected: Iberlija
|
|
39
|
+
- source: Крмзи Су
|
|
40
|
+
expected: Krmzi Su
|
|
41
|
+
- source: Лесноски Рид
|
|
42
|
+
expected: Lesnoski Rid
|
|
43
|
+
- source: Мала Корабска Врата
|
|
44
|
+
expected: Mala Korabska Vrata
|
|
45
|
+
- source: Низок Врв
|
|
46
|
+
expected: Nizok Vrv
|
|
47
|
+
- source: Охридско Езеро
|
|
48
|
+
expected: Ohridsko Ezero
|
|
49
|
+
- source: Прлиќ
|
|
50
|
+
expected: Prlić
|
|
51
|
+
- source: Равна Гора
|
|
52
|
+
expected: Ravna Gora
|
|
53
|
+
- source: Сеѓавечкиот Рид
|
|
54
|
+
expected: Seđavečkiot Rid
|
|
55
|
+
- source: Трновите Њиве
|
|
56
|
+
expected: Trnovite Njive
|
|
57
|
+
- source: Фасов Рид
|
|
58
|
+
expected: Fasov Rid
|
|
59
|
+
- source: Црни Камен
|
|
60
|
+
expected: Crni Kamen
|
|
61
|
+
- source: Чатал Чешми
|
|
62
|
+
expected: Čatal Češmi
|
|
63
|
+
- source: Шехово
|
|
64
|
+
expected: Šehovo
|
|
65
|
+
|
|
66
|
+
notes:
|
|
67
|
+
- The character ѓ should be romanized g when it occurs before е and и. In other
|
|
68
|
+
instances, it should be romanized ǵ (Ǵ).
|
|
69
|
+
- The character ќ should be romanized k when it occurs before е and и. In other
|
|
70
|
+
instances, it should be romanized ć.
|
|
71
|
+
|
|
72
|
+
map:
|
|
73
|
+
rules:
|
|
74
|
+
- pattern: "Ѓ(?=[еЕиИ])"
|
|
75
|
+
result: "G"
|
|
76
|
+
- pattern: "ѓ(?=[еЕиИ])"
|
|
77
|
+
result: "g"
|
|
78
|
+
- pattern: "Ќ(?=[еЕиИ])"
|
|
79
|
+
result: "K"
|
|
80
|
+
- pattern: "ќ(?=[еЕиИ])"
|
|
81
|
+
result: "k"
|
|
82
|
+
|
|
83
|
+
postrules:
|
|
84
|
+
# DZ
|
|
85
|
+
- pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
|
|
86
|
+
result: "DZ"
|
|
87
|
+
#LJ
|
|
88
|
+
- pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
|
|
89
|
+
result: "LJ"
|
|
90
|
+
#NJ
|
|
91
|
+
- pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
|
|
92
|
+
result: "NJ"
|
|
93
|
+
#DŽ
|
|
94
|
+
- pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
|
|
95
|
+
result: "DŽ"
|
|
96
|
+
|
|
97
|
+
characters:
|
|
98
|
+
"\u0410": "A"
|
|
99
|
+
"\u0411": "B"
|
|
100
|
+
"\u0412": "V"
|
|
101
|
+
"\u0413": "G"
|
|
102
|
+
"\u0414": "D"
|
|
103
|
+
"\u0403": "\u0110" # Đ
|
|
104
|
+
"\u0415": "E"
|
|
105
|
+
"\u0416": "\u005a\u030c" # Ž
|
|
106
|
+
"\u0417": "Z"
|
|
107
|
+
"\u0405": "Dz"
|
|
108
|
+
"\u0418": "I"
|
|
109
|
+
"\u0408": "J"
|
|
110
|
+
"\u041A": "K"
|
|
111
|
+
"\u041B": "L"
|
|
112
|
+
"\u0409": "Lj"
|
|
113
|
+
"\u041C": "M"
|
|
114
|
+
"\u041D": "N"
|
|
115
|
+
"\u040A": "Nj"
|
|
116
|
+
"\u041E": "O"
|
|
117
|
+
"\u041F": "P"
|
|
118
|
+
"\u0420": "R"
|
|
119
|
+
"\u0421": "S"
|
|
120
|
+
"\u0422": "T"
|
|
121
|
+
"\u040c": "\u0106" # Ć
|
|
122
|
+
"\u0423": "U"
|
|
123
|
+
"\u0424": "F"
|
|
124
|
+
"\u0425": "H"
|
|
125
|
+
"\u0426": "C"
|
|
126
|
+
"\u0427": "\u0043\u030c" # Č
|
|
127
|
+
"\u040F": "D\u007a\u030c" # Dž
|
|
128
|
+
"\u0428": "\u0053\u030c" # Š
|
|
129
|
+
"\u0430": "a"
|
|
130
|
+
"\u0431": "b"
|
|
131
|
+
"\u0432": "v"
|
|
132
|
+
"\u0433": "g"
|
|
133
|
+
"\u0434": "d"
|
|
134
|
+
"\u0453": "\u0111" # đ
|
|
135
|
+
"\u0435": "e"
|
|
136
|
+
"\u0436": "\u007a\u030c" # ž
|
|
137
|
+
"\u0437": "z"
|
|
138
|
+
"\u0455": "dz"
|
|
139
|
+
"\u0438": "i"
|
|
140
|
+
"\u0458": "j"
|
|
141
|
+
"\u043A": "k"
|
|
142
|
+
"\u043B": "l"
|
|
143
|
+
"\u0459": "lj"
|
|
144
|
+
"\u043C": "m"
|
|
145
|
+
"\u043D": "n"
|
|
146
|
+
"\u045A": "nj"
|
|
147
|
+
"\u043E": "o"
|
|
148
|
+
"\u043F": "p"
|
|
149
|
+
"\u0440": "r"
|
|
150
|
+
"\u0441": "s"
|
|
151
|
+
"\u0442": "t"
|
|
152
|
+
"\u045c": "\u0107" # ć
|
|
153
|
+
"\u0443": "u"
|
|
154
|
+
"\u0444": "f"
|
|
155
|
+
"\u0445": "h"
|
|
156
|
+
"\u0446": "c"
|
|
157
|
+
"\u0447": "\u0063\u030c" # č
|
|
158
|
+
"\u045F": "d\u007a\u030c" # dž
|
|
159
|
+
"\u0448": "\u0073\u030c" # š
|