interscript 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +246 -14
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/g2pwrapper.py +34 -0
  6. data/lib/interscript.rb +140 -16
  7. data/lib/interscript/command.rb +27 -0
  8. data/lib/interscript/mapping.rb +125 -0
  9. data/lib/interscript/version.rb +1 -1
  10. data/lib/model-7 +0 -0
  11. data/lib/tha-pt-b-7 +0 -0
  12. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  13. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  14. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  15. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  18. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  19. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  21. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  22. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  23. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  24. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  25. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  26. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  27. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  28. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
  29. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
  30. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  31. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  32. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  33. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  34. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  35. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
  36. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  37. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  38. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  39. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
  40. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
  41. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  42. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  43. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  44. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  45. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  46. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  47. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  48. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  49. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  50. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  51. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
  52. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  53. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
  54. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  57. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  59. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  60. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  61. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  62. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  63. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  64. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  65. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
  68. data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
  69. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
  70. data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
  71. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  72. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
  73. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  74. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  75. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  76. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  77. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
  78. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  79. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  80. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  81. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  82. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  83. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  84. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  85. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  86. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  87. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  88. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  89. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  90. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  91. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
  92. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  93. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  94. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  95. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  96. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  97. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  98. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  99. data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
  100. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  101. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  102. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  103. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  104. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  105. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  106. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  107. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  108. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  109. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  110. data/spec/interscript/mapping_spec.rb +42 -0
  111. data/spec/interscript_spec.rb +20 -5
  112. data/spec/spec_helper.rb +3 -1
  113. metadata +149 -24
  114. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  115. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  116. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  117. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  118. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  119. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  120. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -0,0 +1,253 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: kn-1945
4
+ language: kor
5
+ source_script: Hang
6
+ destination_script: Latn
7
+ name: BGN/PCGN 1945 Agreement
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693725/ROMANIZATION_OF_KOREAN-_MR_for_DPRK.pdf
9
+ creation_date: 1945
10
+ adoption_date:
11
+ description:
12
+
13
+ notes: "
14
+
15
+ 1. At the end of a syllable, the character ᄋ should be romanized ng,
16
+ as in the following example:
17
+
18
+ 평양 → P’yŏngyang
19
+
20
+ At the beginning of a syllable, the character ᄋ is silent and
21
+ should not be romanized. An example follows:
22
+
23
+ 용화 → Yonghwa
24
+
25
+ 2. Syllable boundaries within words are not reflected in romanization.
26
+ In the different types of syllables shown in the table below, C
27
+ represents any consonant character, V represents any vowel character
28
+ and / represents a syllable boundary.
29
+
30
+ Han’gŭl 개성 남포 안양
31
+ Syllable boundaries CV/CVC CVC/CV VC/VC
32
+ Romanization Kaesŏng Namp’o Anyang
33
+
34
+ 3. Euphonic changes occurring within a word, including between the
35
+ specific and generic of a geographical name, should be reflected in
36
+ romanization. Generic terms are usually seen separated from the name
37
+ by a hyphen and with a lower case initial letter rather than as a
38
+ separate word:
39
+
40
+ 영진리 → Yŏngjil-li
41
+ 덕흥리 → Tŏkhŭng-ni
42
+ 압록강 → Amnok-kang
43
+ 대동강 → Taedong-gang
44
+
45
+ 4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
46
+ published in North Korea in 1966), unlike the Korean spoken in the
47
+ Republic of Korea, the language spoken in the Democratic People’s
48
+ Republic of Korea maintains and pronounces the word-initial ᆯ (‘r’).
49
+ The use of the word-initial ᄅ ('r') can be seen in official news
50
+ reports as well as native mapping. Since such examples exist, the
51
+ word initial ᄅ ('r') is reflected as an option in the tables given above.
52
+
53
+ 5. The Romanization column shows only lowercase forms but, when romanizing,
54
+ uppercase and lowercase Roman letters as appropriate should be used.
55
+ "
56
+
57
+ tests:
58
+ - source: "평양"
59
+ expected: "P’yŏngyang"
60
+ - source: "용화"
61
+ expected: "Yonghwa"
62
+ - source: "개성"
63
+ expected: "Kaesŏng"
64
+ - source: "남포"
65
+ expected: "Namp’o"
66
+ - source: "안양"
67
+ expected: "Anyang"
68
+ - source: "영진-리"
69
+ expected: "Yŏngjil-li"
70
+ - source: "덕흥-리"
71
+ expected: "Tŏkhŭng-ni"
72
+ - source: "압록-강"
73
+ expected: "Amnok-kang"
74
+ - source: "대동-강"
75
+ expected: "Taedong-gang"
76
+ - source: "라선특별시"
77
+ expected: "Rasŏnt’ŭkpyŏlsi"
78
+ - source: 은하-리
79
+ expected: "Ŭnha-ri"
80
+ - source: 은중-리
81
+ expected: "Ŭnjung-ni"
82
+ - source: 은장-령
83
+ expected: "Ŭnjang-nyŏng"
84
+ - source: 은혜-동
85
+ expected: "Ŭnhye-dong"
86
+ - source: 은호-리
87
+ expected: "Ŭnho-ri"
88
+ - source: 은행정
89
+ expected: "Ŭnhaengjŏng"
90
+ - source: 은행-동
91
+ expected: "Ŭnhaeng-dong"
92
+ - source: 은행-촌
93
+ expected: "Ŭnhaeng-ch’on"
94
+ - source: 원수
95
+ expected: "Wŏnsu"
96
+ - source: 원소리-고개
97
+ expected: "Wŏnsori-gogae"
98
+ - source: 원소참
99
+ expected: "Wŏnsoch’am"
100
+ - source: 원소-리
101
+ expected: "Wŏnso-ri"
102
+ - source: 원신-리
103
+ expected: "Wŏnsil-li"
104
+ - source: 난곡
105
+ expected: "Nan’gok"
106
+ - source: 난산-리
107
+ expected: "Nansal-li"
108
+ - source: 난직
109
+ expected: "Nanjik"
110
+ - source: 영곡
111
+ expected: "Yŏnggok"
112
+ - source: 윗두밀
113
+ expected: "Wittumil"
114
+ - source: 윗도심이
115
+ expected: "Wittosimi"
116
+ - source: 둔지
117
+ expected: "Tunji"
118
+ - source: 서승
119
+ expected: "Sŏsŭng"
120
+ - source: 신촌
121
+ expected: "Sinch’on"
122
+ - source: 비암덕
123
+ expected: "Piamdŏk"
124
+ - source: 바위안
125
+ expected: "Pawian"
126
+ - source: 오송평
127
+ expected: "Osongp’yŏng"
128
+ - source: 그물목
129
+ expected: "Kŭmulmok"
130
+ - source: 구원정
131
+ expected: "Kuwŏnjŏng"
132
+ - source: 일하
133
+ expected: "Irha"
134
+ - source: 황우
135
+ expected: "Hwangu"
136
+ - source: 자작보
137
+ expected: "Chajakpo"
138
+ - source: 비파1-동
139
+ expected: "Pip’a Il-tong"
140
+ - source: 문암 오-동
141
+ expected: "Munam O-dong"
142
+
143
+ map:
144
+ character_separator: ""
145
+ word_separator: " "
146
+ title_case: True
147
+ inherit: [var-kor-Hang-Latn-mr-1939]
148
+
149
+ rules:
150
+ # Add Zero-width White-space U+200B after spaces (i.e. before word boundaries)
151
+ # So that the word-initial conversion rules will be blocked.
152
+ - pattern: "^"
153
+ result: "\u200B"
154
+ - pattern: "(?<= )"
155
+ result: "\u200B"
156
+
157
+ # convert numbers to space + Hangul
158
+ - pattern: "([^0-9 ])(?=[0-9])"
159
+ result: "\\1 "
160
+ - pattern: "1"
161
+ result: "일"
162
+ - pattern: "2"
163
+ result: "이"
164
+ - pattern: "3"
165
+ result: "삼"
166
+ - pattern: "4"
167
+ result: "사"
168
+ - pattern: "5"
169
+ result: "오"
170
+ - pattern: "6"
171
+ result: "육"
172
+ - pattern: "7"
173
+ result: "칠"
174
+ - pattern: "8"
175
+ result: "팔"
176
+ - pattern: "9"
177
+ result: "구"
178
+
179
+ # This is a logic to add hyphen in front of generics
180
+ # - pattern: "(?<=.)(구역|동|리|도|고개|골|로동자구|사무소|초등학교|중학교|고등학교|강|포|령|역|봉|사|천|교|제|저수지|소류지|재|못|말|면|암|교회|촌|병원|바위|공원|섬|우체국|대학교|보건소|굴|치|대교|지구|폭포|해수욕장|휴게소|중고교|읍|보건진료소|마을|톨게이트|대학|시장|경찰서|학교)$" #to be expanded
181
+ # result: "-\\1"
182
+
183
+ postrules:
184
+
185
+ # Add space to the two ends of the string for easier word boundary handling
186
+ - pattern: "^"
187
+ result: " "
188
+ - pattern: "$"
189
+ result: " "
190
+
191
+ # Initial rules in the inherited map were blocked, so that
192
+ # this set of updated rules (with the onset rules removed) will be used instead.
193
+ - pattern: "\u200B"
194
+ result: ""
195
+
196
+ - pattern: "(?<= )ᄀ"
197
+ result: "k" # HANGUL CHOSEONG KIYEOK
198
+ - pattern: "(?<= )ᄂ"
199
+ result: "n" # HANGUL CHOSEONG NIEUN
200
+ - pattern: "(?<= )ᄃ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
201
+ result: "ch" # HANGUL CHOSEONG TIEUT # T -> Ch before yotized vowels
202
+ - pattern: "(?<= )ᄃ"
203
+ result: "t" # HANGUL CHOSEONG TIEUT
204
+ - pattern: "(?<= )ᄅ"
205
+ result: "r" # HANGUL CHOSEONG RIEUL
206
+ - pattern: "(?<= )ᄆ"
207
+ result: "m" # HANGUL CHOSEONG MIEUM
208
+ - pattern: "(?<= )ᄇ"
209
+ result: "p" # HANGUL CHOSEONG PIEUP
210
+ - pattern: "(?<= )ᄉ(?=ᅱ)"
211
+ result: "sh" # HANGUL CHOSEONG SIOS
212
+ - pattern: "(?<= )ᄉ"
213
+ result: "s" # HANGUL CHOSEONG SIOS
214
+ - pattern: "(?<= )ᄋ"
215
+ result: "" # HANGUL CHOSEONG IEUNG
216
+ - pattern: "(?<= )ᄌ"
217
+ result: "ch" # HANGUL CHOSEONG CIEUC
218
+ - pattern: "(?<= )ᄎ"
219
+ result: "ch’" # HANGUL CHOSEONG CHIEUCH
220
+ - pattern: "(?<= )ᄏ"
221
+ result: "k’" # HANGUL CHOSEONG KHIEUKH
222
+ - pattern: "(?<= )ᄐ(?=[ᅵᅣᅤᅧᅨᅭᅲ])"
223
+ result: "ch’" # HANGUL CHOSEONG THIEUTH + YOTIZED VOWELS
224
+ - pattern: "(?<= )ᄐ"
225
+ result: "t’" # HANGUL CHOSEONG THIEUTH
226
+ - pattern: "(?<= )ᄑ"
227
+ result: "p’" # HANGUL CHOSEONG PHIEUPH
228
+ - pattern: "(?<= )ᄒ"
229
+ result: "h" # HANGUL CHOSEONG HIEUH
230
+ - pattern: "(?<= )ᄁ"
231
+ result: "kk" # HANGUL CHOSEONG SSANGKIYEOK
232
+ - pattern: "(?<= )ᄭ"
233
+ result: "kk" # HANGUL CHOSEONG SIOS-KIYEOK
234
+ - pattern: "(?<= )ᄄ"
235
+ result: "tt" # HANGUL CHOSEONG SSANGTIEUT
236
+ - pattern: "(?<= )ᄯ"
237
+ result: "tt" # HANGUL CHOSEONG SIOS-TIEUT
238
+ - pattern: "(?<= )ᄈ"
239
+ result: "pp" # HANGUL CHOSEONG SSANGPIEUP
240
+ - pattern: "(?<= )ᄲ"
241
+ result: "pp" # HANGUL CHOSEONG SIOS-PIEUP
242
+ - pattern: "(?<= )ᄊ"
243
+ result: "ss" # HANGUL CHOSEONG SSANGSIOS
244
+ - pattern: "(?<= )ᄍ"
245
+ result: "tch" # HANGUL CHOSEONG SSANGCIEUC
246
+ - pattern: "(?<= )ᄶ"
247
+ result: "tch" # HANGUL CHOSEONG SIOS-CIEUC
248
+
249
+ # Remove space added
250
+ - pattern: "^ "
251
+ result: ""
252
+ - pattern: " $"
253
+ result: ""
@@ -0,0 +1,48 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2011
4
+ language: kor
5
+ source_script: Hang
6
+ destination_script: Latn
7
+ name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
+ url:
9
+ creation_date:
10
+ adoption_date:
11
+ description:
12
+
13
+ notes:
14
+ BGN/PCGN 2011 Agreement
15
+
16
+ tests:
17
+ - source: 불국사
18
+ expected: "Bulguksa"
19
+ - source: 묵호
20
+ expected: "Mukho"
21
+ - source: 울산
22
+ expected: "Ulsan"
23
+ - source: 독립문
24
+ expected: "Dongnimmun"
25
+ - source: 강남역
26
+ expected: "Gangnamyeok"
27
+ - source: 남산리
28
+ expected: "Namsan-ri" #Note: no assimilation for -ri even after nasals
29
+ - source: 내월리
30
+ expected: "Naewol-ri"
31
+ - source: 울릉군
32
+ expected: "Ulleung-gun"
33
+ - source: 설악산
34
+ expected: "Seoraksan"
35
+ - source: 삼죽면
36
+ expected: "Samjuk-myeon"
37
+ - source: 평리1동
38
+ expected: "Pyeongni Il-dong"
39
+ - source: 평리2동
40
+ expected: "Pyeongni I-dong"
41
+ - source: 탑안이
42
+ expected: "Tabani"
43
+
44
+ map:
45
+ character_separator: ""
46
+ word_separator: " "
47
+ title_case: True
48
+ inherit: moct-kor-Hang-Latn-2000
@@ -0,0 +1,48 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2011
4
+ language: kor
5
+ source_script: Kore
6
+ destination_script: Latn
7
+ name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
+ url:
9
+ creation_date:
10
+ adoption_date:
11
+ description:
12
+
13
+ notes:
14
+ BGN/PCGN 2011 Agreement
15
+
16
+ tests:
17
+ - source: 佛國寺
18
+ expected: "Bulguksa"
19
+ - source: 묵호
20
+ expected: "Mukho"
21
+ - source: 蔚山
22
+ expected: "Ulsan"
23
+ - source: 獨立門
24
+ expected: "Dongnimmun"
25
+ - source: 江南驛
26
+ expected: "Gangnamyeok"
27
+ - source: 南山里
28
+ expected: "Namsan-ri" #Note: no assimilation for -ri even after nasals
29
+ - source: 내월里
30
+ expected: "Naewol-ri"
31
+ - source: 鬱陵郡
32
+ expected: "Ulleung-gun"
33
+ - source: 雪嶽山
34
+ expected: "Seoraksan"
35
+ - source: 三竹面
36
+ expected: "Samjuk-myeon"
37
+ - source: 坪里1洞
38
+ expected: "Pyeongni Il-dong"
39
+ - source: 坪里2洞
40
+ expected: "Pyeongni I-dong"
41
+ - source: 탑안이
42
+ expected: "Tabani"
43
+
44
+ map:
45
+ character_separator: ""
46
+ word_separator: " "
47
+ title_case: True
48
+ inherit: [var-kor-Kore-Hang-2013, moct-kor-Hang-Latn-2000]
@@ -0,0 +1,159 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1981
4
+ language: mkd
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Makedonian Romanization, BGN/PCGN 1981 System
8
+ url: https://github.com/riboseinc/interscript/files/4247920/USBGN_romanization_Macedonian_1981.pdf
9
+ creation_date: 1981
10
+ description: BGN/PCGN Romanization table for Makedonian.
11
+
12
+ tests:
13
+ - source: Ѓол
14
+ expected: Đol
15
+ - source: Јусек Тепеси
16
+ expected: Jusek Tepesi
17
+ - source: Љуги Ќарит
18
+ expected: Ljugi Ćarit
19
+ - source: Ќафа Сан
20
+ expected: Ćafa San
21
+ - source: Агроплод Ресен
22
+ expected: Agroplod Resen
23
+ - source: Алта Чука
24
+ expected: Alta Čuka
25
+ - source: Баш Тепе
26
+ expected: Baš Tepe
27
+ - source: Браќам
28
+ expected: Braćam
29
+ - source: Винарска Визба Агропин
30
+ expected: Vinarska Vizba Agropin
31
+ - source: Галичица
32
+ expected: Galičica
33
+ - source: Дрењево
34
+ expected: Drenjevo
35
+ - source: Енешево
36
+ expected: Eneševo
37
+ - source: Иберлија
38
+ expected: Iberlija
39
+ - source: Крмзи Су
40
+ expected: Krmzi Su
41
+ - source: Лесноски Рид
42
+ expected: Lesnoski Rid
43
+ - source: Мала Корабска Врата
44
+ expected: Mala Korabska Vrata
45
+ - source: Низок Врв
46
+ expected: Nizok Vrv
47
+ - source: Охридско Езеро
48
+ expected: Ohridsko Ezero
49
+ - source: Прлиќ
50
+ expected: Prlić
51
+ - source: Равна Гора
52
+ expected: Ravna Gora
53
+ - source: Сеѓавечкиот Рид
54
+ expected: Seđavečkiot Rid
55
+ - source: Трновите Њиве
56
+ expected: Trnovite Njive
57
+ - source: Фасов Рид
58
+ expected: Fasov Rid
59
+ - source: Црни Камен
60
+ expected: Crni Kamen
61
+ - source: Чатал Чешми
62
+ expected: Čatal Češmi
63
+ - source: Шехово
64
+ expected: Šehovo
65
+
66
+ notes:
67
+ - The character ѓ should be romanized g when it occurs before е and и. In other
68
+ instances, it should be romanized ǵ (Ǵ).
69
+ - The character ќ should be romanized k when it occurs before е and и. In other
70
+ instances, it should be romanized ć.
71
+
72
+ map:
73
+ rules:
74
+ - pattern: "Ѓ(?=[еЕиИ])"
75
+ result: "G"
76
+ - pattern: "ѓ(?=[еЕиИ])"
77
+ result: "g"
78
+ - pattern: "Ќ(?=[еЕиИ])"
79
+ result: "K"
80
+ - pattern: "ќ(?=[еЕиИ])"
81
+ result: "k"
82
+
83
+ postrules:
84
+ # DZ
85
+ - pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
86
+ result: "DZ"
87
+ #LJ
88
+ - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
89
+ result: "LJ"
90
+ #NJ
91
+ - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
92
+ result: "NJ"
93
+ #DŽ
94
+ - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
95
+ result: "DŽ"
96
+
97
+ characters:
98
+ "\u0410": "A"
99
+ "\u0411": "B"
100
+ "\u0412": "V"
101
+ "\u0413": "G"
102
+ "\u0414": "D"
103
+ "\u0403": "\u0110" # Đ
104
+ "\u0415": "E"
105
+ "\u0416": "\u005a\u030c" # Ž
106
+ "\u0417": "Z"
107
+ "\u0405": "Dz"
108
+ "\u0418": "I"
109
+ "\u0408": "J"
110
+ "\u041A": "K"
111
+ "\u041B": "L"
112
+ "\u0409": "Lj"
113
+ "\u041C": "M"
114
+ "\u041D": "N"
115
+ "\u040A": "Nj"
116
+ "\u041E": "O"
117
+ "\u041F": "P"
118
+ "\u0420": "R"
119
+ "\u0421": "S"
120
+ "\u0422": "T"
121
+ "\u040c": "\u0106" # Ć
122
+ "\u0423": "U"
123
+ "\u0424": "F"
124
+ "\u0425": "H"
125
+ "\u0426": "C"
126
+ "\u0427": "\u0043\u030c" # Č
127
+ "\u040F": "D\u007a\u030c" # Dž
128
+ "\u0428": "\u0053\u030c" # Š
129
+ "\u0430": "a"
130
+ "\u0431": "b"
131
+ "\u0432": "v"
132
+ "\u0433": "g"
133
+ "\u0434": "d"
134
+ "\u0453": "\u0111" # đ
135
+ "\u0435": "e"
136
+ "\u0436": "\u007a\u030c" # ž
137
+ "\u0437": "z"
138
+ "\u0455": "dz"
139
+ "\u0438": "i"
140
+ "\u0458": "j"
141
+ "\u043A": "k"
142
+ "\u043B": "l"
143
+ "\u0459": "lj"
144
+ "\u043C": "m"
145
+ "\u043D": "n"
146
+ "\u045A": "nj"
147
+ "\u043E": "o"
148
+ "\u043F": "p"
149
+ "\u0440": "r"
150
+ "\u0441": "s"
151
+ "\u0442": "t"
152
+ "\u045c": "\u0107" # ć
153
+ "\u0443": "u"
154
+ "\u0444": "f"
155
+ "\u0445": "h"
156
+ "\u0446": "c"
157
+ "\u0447": "\u0063\u030c" # č
158
+ "\u045F": "d\u007a\u030c" # dž
159
+ "\u0448": "\u0073\u030c" # š