interscript 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +246 -14
- data/bin/interscript +38 -17
- data/bin/setup +8 -0
- data/lib/g2pwrapper.py +34 -0
- data/lib/interscript.rb +140 -16
- data/lib/interscript/command.rb +27 -0
- data/lib/interscript/mapping.rb +125 -0
- data/lib/interscript/version.rb +1 -1
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
- data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
- data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
- data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
- data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
- data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
- data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
- data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
- data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
- data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
- data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
- data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
- data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
- data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
- data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
- data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
- data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
- data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
- data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
- data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
- data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
- data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
- data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
- data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
- data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
- data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
- data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
- data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
- data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
- data/spec/interscript/mapping_spec.rb +42 -0
- data/spec/interscript_spec.rb +20 -5
- data/spec/spec_helper.rb +3 -1
- metadata +149 -24
- data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
- data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
- data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
- data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
- data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
- data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
- data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: royin
|
|
3
|
+
id: 1999-chained
|
|
4
|
+
language: tha
|
|
5
|
+
source_script: Thai
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Royal Thai General System of Transcription (1999)
|
|
8
|
+
url: http://www.royin.go.th/wp-content/uploads/royin-ebook/276/FileUpload/758_6484.pdf
|
|
9
|
+
creation_date: 1999
|
|
10
|
+
adoption_date:
|
|
11
|
+
description: |
|
|
12
|
+
This map loads two external maps to convert Thai text first into phonemic Thai,
|
|
13
|
+
and then into IPA transcription.
|
|
14
|
+
|
|
15
|
+
The IPA transcription will then be handled by this map, and converted into
|
|
16
|
+
Royal Thai General System of Transcription (1999)
|
|
17
|
+
|
|
18
|
+
The first two parts are done via two external maps.
|
|
19
|
+
|
|
20
|
+
notes: |
|
|
21
|
+
The conversion from Thai to Phonemic Thai is still work-in-progress.
|
|
22
|
+
|
|
23
|
+
tests:
|
|
24
|
+
- source: "สะพาน"
|
|
25
|
+
expected: "saphan"
|
|
26
|
+
- source: "ลานตา"
|
|
27
|
+
expected: "lanta"
|
|
28
|
+
- source: "บาง"
|
|
29
|
+
expected: "bang"
|
|
30
|
+
- source: "สมุทร"
|
|
31
|
+
expected: "samut"
|
|
32
|
+
- source: "ลำ"
|
|
33
|
+
expected: "lam"
|
|
34
|
+
- source: "สิงห์"
|
|
35
|
+
expected: "sing"
|
|
36
|
+
- source: "บุรี"
|
|
37
|
+
expected: "buri"
|
|
38
|
+
- source: "สตึก"
|
|
39
|
+
expected: "satuek"
|
|
40
|
+
- source: "พืช"
|
|
41
|
+
expected: "phuet"
|
|
42
|
+
- source: "บรบือ"
|
|
43
|
+
expected: "borabue"
|
|
44
|
+
- source: "ภู"
|
|
45
|
+
expected: "phu"
|
|
46
|
+
- source: "ปะนาเระ"
|
|
47
|
+
expected: "panare"
|
|
48
|
+
- source: "เพ็ญ"
|
|
49
|
+
expected: "phen"
|
|
50
|
+
# - source: "เขน"
|
|
51
|
+
# expected: "khen"
|
|
52
|
+
- source: "แซะ"
|
|
53
|
+
expected: "sae"
|
|
54
|
+
# - source: "สะแก"
|
|
55
|
+
# expected: "sakae"
|
|
56
|
+
- source: "พะโต๊ะ"
|
|
57
|
+
expected: "phato"
|
|
58
|
+
- source: "ลพ"
|
|
59
|
+
expected: "lop"
|
|
60
|
+
# - source: "สามโก้"
|
|
61
|
+
# expected: "samko"
|
|
62
|
+
- source: "เกาะ"
|
|
63
|
+
expected: "ko"
|
|
64
|
+
- source: "บ่อ"
|
|
65
|
+
expected: "bo"
|
|
66
|
+
- source: "เซอะ"
|
|
67
|
+
expected: "soe"
|
|
68
|
+
- source: "อำเภอ"
|
|
69
|
+
expected: "amphoe"
|
|
70
|
+
- source: "เนิน"
|
|
71
|
+
expected: "noen"
|
|
72
|
+
# - source: "เพียะ"
|
|
73
|
+
# expected: "phia"
|
|
74
|
+
- source: "เทียน"
|
|
75
|
+
expected: "thian"
|
|
76
|
+
# - source: "เกือะ"
|
|
77
|
+
# expected: "kuea"
|
|
78
|
+
- source: "เมือง"
|
|
79
|
+
expected: "mueang"
|
|
80
|
+
# - source: "ผัวะ"
|
|
81
|
+
# expected: "phua"
|
|
82
|
+
- source: "บัว"
|
|
83
|
+
expected: "bua"
|
|
84
|
+
# - source: "ควน"
|
|
85
|
+
# expected: "khuan"
|
|
86
|
+
- source: "ใหญ่"
|
|
87
|
+
expected: "yai"
|
|
88
|
+
# - source: "ไผ่"
|
|
89
|
+
# expected: "phai"
|
|
90
|
+
- source: "ชัย"
|
|
91
|
+
expected: "chai"
|
|
92
|
+
- source: "ไทย"
|
|
93
|
+
expected: "thai"
|
|
94
|
+
# - source: "ปาย"
|
|
95
|
+
# expected: "pai"
|
|
96
|
+
- source: "เจ้า"
|
|
97
|
+
expected: "chao"
|
|
98
|
+
- source: "ข้าว"
|
|
99
|
+
expected: "khao"
|
|
100
|
+
# - source: "กุย"
|
|
101
|
+
# expected: "kui"
|
|
102
|
+
- source: "โดย"
|
|
103
|
+
expected: "doi"
|
|
104
|
+
# - source: "ดอย"
|
|
105
|
+
# expected: "doi"
|
|
106
|
+
# - source: "งิ้ว"
|
|
107
|
+
# expected: "ngio"
|
|
108
|
+
- source: "เร็ว"
|
|
109
|
+
expected: "reo"
|
|
110
|
+
# - source: "เลว"
|
|
111
|
+
# expected: "leo"
|
|
112
|
+
# - source: "เลย"
|
|
113
|
+
# expected: "loei"
|
|
114
|
+
# - source: "เดือย"
|
|
115
|
+
# expected: "dueai"
|
|
116
|
+
# - source: "ห้วย"
|
|
117
|
+
# expected: "huai"
|
|
118
|
+
- source: "แมว"
|
|
119
|
+
expected: "maeo"
|
|
120
|
+
- source: "เขียว"
|
|
121
|
+
expected: "khiao"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
chain: ["var-tha-Thai-Thai-phonemic" ,"var-tha-Thai-Zsym-ipa"]
|
|
125
|
+
|
|
126
|
+
map:
|
|
127
|
+
title-case: false
|
|
128
|
+
word_separator: " "
|
|
129
|
+
|
|
130
|
+
rules:
|
|
131
|
+
- pattern: '[˩˨˧˦˥]'
|
|
132
|
+
result : ''
|
|
133
|
+
- pattern: '^'
|
|
134
|
+
result: '.'
|
|
135
|
+
|
|
136
|
+
postrules:
|
|
137
|
+
- pattern: '\.'
|
|
138
|
+
result: ''
|
|
139
|
+
|
|
140
|
+
characters:
|
|
141
|
+
|
|
142
|
+
dictionary:
|
|
143
|
+
|
|
144
|
+
'̯': ''
|
|
145
|
+
'̚': ''
|
|
146
|
+
|
|
147
|
+
'ʔ': ''
|
|
148
|
+
'ː': ''
|
|
149
|
+
|
|
150
|
+
't͡ɕʰ': 'ch'
|
|
151
|
+
't͡ɕ': 'ch'
|
|
152
|
+
'ŋ': 'ng'
|
|
153
|
+
'j': 'y'
|
|
154
|
+
'ɔ': 'o'
|
|
155
|
+
'ɤ': 'oe'
|
|
156
|
+
'ɛ': 'ae'
|
|
157
|
+
'ɯ': 'ue' # New spelling, was u in 1968
|
|
158
|
+
'ʰ': 'h'
|
|
159
|
+
|
|
160
|
+
'aːw': 'ao'
|
|
161
|
+
'aw': 'ao'
|
|
162
|
+
'a̯w': 'ao' # New spelling, was eu in 1968
|
|
163
|
+
'eːw': 'eo'
|
|
164
|
+
'ew': 'eo'
|
|
165
|
+
'ɛːw': 'aeo'
|
|
166
|
+
'ɛw': 'aeo'
|
|
167
|
+
'iːw': 'io' # New spelling, was iu in 1968
|
|
168
|
+
'iw': 'io' # New spelling, was iu in 1968
|
|
169
|
+
|
|
170
|
+
'aːj': 'ai'
|
|
171
|
+
'aj': 'ai'
|
|
172
|
+
'a̯j': 'ai'
|
|
173
|
+
'ɔːj': 'oi'
|
|
174
|
+
'ɔj': 'oi'
|
|
175
|
+
'oːj': 'oi'
|
|
176
|
+
'oj': 'oi'
|
|
177
|
+
'ɤːj': 'oei'
|
|
178
|
+
'ɤj': 'oei'
|
|
179
|
+
'uːj': 'ui'
|
|
180
|
+
'uj': 'ui'
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: royin
|
|
3
|
+
id: 1999
|
|
4
|
+
language: tha
|
|
5
|
+
source_script: Thai
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Royal Thai General System of Transcription (1999)
|
|
8
|
+
url: http://www.royin.go.th/wp-content/uploads/royin-ebook/276/FileUpload/758_6484.pdf
|
|
9
|
+
creation_date: 1999
|
|
10
|
+
adoption_date:
|
|
11
|
+
description:
|
|
12
|
+
|
|
13
|
+
notes: |
|
|
14
|
+
|
|
15
|
+
tests:
|
|
16
|
+
- source: 'ภาษาไทย'
|
|
17
|
+
expected: 'phasathai'
|
|
18
|
+
- source: 'ไทย'
|
|
19
|
+
expected: 'thai'
|
|
20
|
+
- source: 'เชียงใหม่'
|
|
21
|
+
expected: 'chiangmai'
|
|
22
|
+
- source: 'ใหม่'
|
|
23
|
+
expected: 'mai'
|
|
24
|
+
- source: 'ใคร'
|
|
25
|
+
expected: 'khrai'
|
|
26
|
+
- source: "ที่"
|
|
27
|
+
expected: "thi"
|
|
28
|
+
- source: "เป็น"
|
|
29
|
+
expected: "pen"
|
|
30
|
+
- source: "ใน"
|
|
31
|
+
expected: "nai"
|
|
32
|
+
- source: "การ"
|
|
33
|
+
expected: "kan"
|
|
34
|
+
- source: "มี"
|
|
35
|
+
expected: "mi"
|
|
36
|
+
- source: "ได้"
|
|
37
|
+
expected: "dai"
|
|
38
|
+
- source: "ของ"
|
|
39
|
+
expected: "khong"
|
|
40
|
+
- source: "ไม่"
|
|
41
|
+
expected: "mai"
|
|
42
|
+
- source: "สถานีปางต้นผึ้ง"
|
|
43
|
+
expected: "sathanipangtonphueng"
|
|
44
|
+
- source: "ไพศาลี"
|
|
45
|
+
expected: "phaisali"
|
|
46
|
+
- source: "โรงเรียนไม้เรียงประชาสรรค์"
|
|
47
|
+
expected: "rongrianmairiangprachasan"
|
|
48
|
+
|
|
49
|
+
map:
|
|
50
|
+
title-case: false
|
|
51
|
+
word_separator: " "
|
|
52
|
+
transcription: "sequitur.pythainlp_lexicon"
|
|
53
|
+
|
|
54
|
+
rules:
|
|
55
|
+
|
|
56
|
+
postrules:
|
|
57
|
+
- pattern: 'chh'
|
|
58
|
+
result: 'ch'
|
|
59
|
+
|
|
60
|
+
characters:
|
|
61
|
+
'[0-9]': ''
|
|
62
|
+
|
|
63
|
+
dictionary:
|
|
64
|
+
'ʰ': 'h'
|
|
65
|
+
'c': 'ch'
|
|
66
|
+
'ː': ''
|
|
67
|
+
'ŋ': 'ng'
|
|
68
|
+
'j': 'i'
|
|
69
|
+
'w': 'o'
|
|
70
|
+
'ɔ': 'o'
|
|
71
|
+
'ɤ': 'oe'
|
|
72
|
+
'ɛ': 'ae'
|
|
73
|
+
'ɯ': 'ue'
|
|
74
|
+
'ʔ' : ''
|
|
75
|
+
't͡ɕ': 'c'
|
|
76
|
+
'.' : ''
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
|
-
authority_id:
|
|
3
|
-
id:
|
|
2
|
+
authority_id: sac
|
|
3
|
+
id: 1979
|
|
4
4
|
language: chn
|
|
5
5
|
source_script: Hans
|
|
6
6
|
destination_script: Latn
|
|
@@ -11,8 +11,7 @@ description: |
|
|
|
11
11
|
This table contains the data on how Unicode Hanzi characters
|
|
12
12
|
are pronounced in P.R.China.
|
|
13
13
|
tests:
|
|
14
|
-
|
|
15
|
-
expected:
|
|
14
|
+
|
|
16
15
|
|
|
17
16
|
notes:
|
|
18
17
|
- Nine tab-separated columns. Column #1 is the Unicode (in hex), Columns #2 to #6 are Pinyin (tone '5' means Qingsheng)
|
|
@@ -13601,8 +13600,8 @@ map:
|
|
|
13601
13600
|
"\u7A21": "zui4"
|
|
13602
13601
|
"\u7A22": "yu4"
|
|
13603
13602
|
"\u7A23": "su1"
|
|
13604
|
-
"\u7A24":
|
|
13605
|
-
"\u7A25":
|
|
13603
|
+
"\u7A24": "lue4"
|
|
13604
|
+
"\u7A25": "xiang1"
|
|
13606
13605
|
"\u7A26": "yi1"
|
|
13607
13606
|
"\u7A27": "xi4"
|
|
13608
13607
|
"\u7A28": "bian1"
|
|
@@ -21054,7 +21053,7 @@ map:
|
|
|
21054
21053
|
"\u92DF": "qin2"
|
|
21055
21054
|
"\u92E0": "shen4"
|
|
21056
21055
|
"\u92E1": "han2"
|
|
21057
|
-
"\u92E2":
|
|
21056
|
+
"\u92E2": "lue4"
|
|
21058
21057
|
"\u92E3": "ye2"
|
|
21059
21058
|
"\u92E4": "chu2"
|
|
21060
21059
|
"\u92E5": "zeng4"
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: stategeocadastre
|
|
3
|
+
id: 1993
|
|
4
|
+
language: ukr
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: PROVISIONAL RULES OF REPRODUCING LETTERS OF THE UKRAINIAN ALPHABET WITH LATIN (ENGLISH) CHARACTERS
|
|
8
|
+
url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/17th-gegn-docs/17th_gegn_WP73.pdf
|
|
9
|
+
creation_date: 1993
|
|
10
|
+
description: |
|
|
11
|
+
These Rules are intended for Romanized transliteration of Ukrainian
|
|
12
|
+
geographic names in international cartographic editions.
|
|
13
|
+
|
|
14
|
+
Geographic names of Russia, Byelorussia, Bulgaria and other states
|
|
15
|
+
using the Cyrillic alphabet are transliterated according to rules
|
|
16
|
+
accepted in those states.
|
|
17
|
+
|
|
18
|
+
These Rules come into effect from the moment of their approval by
|
|
19
|
+
the Main Administration of Geodesy, Cartography and Cadastre and
|
|
20
|
+
will be effective until the introduction of a State standard of
|
|
21
|
+
Ukraine regulating the Romanized transliteration of the Ukrainian
|
|
22
|
+
alphabet.
|
|
23
|
+
|
|
24
|
+
notes:
|
|
25
|
+
- No apostrophe (’) is used in transliteration, the combination "ьо" is transliterated as "io"
|
|
26
|
+
- Use of capitals in Latin version of Ukrainian geographic names correspond to the Ukrainian spelling
|
|
27
|
+
- Generics geographical terms standing before or after a name in full or abbreviated form are transliterated
|
|
28
|
+
- Romanized versions of complex and compound Ukrainian toponyms (one word, hyphenated or separate words) will follow the Ukrainian spelling
|
|
29
|
+
- In indexes of Romanized geographical names entries must be arranged in the order of the Latin (English) alphabet
|
|
30
|
+
- Geographic names of Russia, Byelorussia, Bulgaria and other states using the Cyrillic alphabet are transliterated according to rules accepted in those states.
|
|
31
|
+
- "Ed: There seems to be a mistake in the source document. 'ц' should be replaced with ts instead 'tz'."
|
|
32
|
+
|
|
33
|
+
tests:
|
|
34
|
+
- source: Кам’янка # note[1]
|
|
35
|
+
expected: Kamianka
|
|
36
|
+
- source: Сьомаки # note[1]
|
|
37
|
+
expected: Siomaky
|
|
38
|
+
- source: Усть-Чорна # note[2]
|
|
39
|
+
expected: Ust’-Chorna
|
|
40
|
+
- source: Чорне море # note[2]
|
|
41
|
+
expected: Chorne more
|
|
42
|
+
- source: оз. Сиваш # note[3]
|
|
43
|
+
expected: oz. Syvash
|
|
44
|
+
- source: Кримський канал # note[3]
|
|
45
|
+
expected: Kryms’kyi kanal # ! Example had typo in original document "Krums’kyi kanal"
|
|
46
|
+
- source: Гола Пристань
|
|
47
|
+
expected: Hola Prystan’
|
|
48
|
+
- source: Корсунь Шевченківський
|
|
49
|
+
expected: Korsun’ Shevchenkivs’kyi
|
|
50
|
+
- source: Верхньодніпровськ
|
|
51
|
+
expected: Verkhniodniprovs’k
|
|
52
|
+
- source: Варва
|
|
53
|
+
expected: Varva
|
|
54
|
+
- source: Броди
|
|
55
|
+
expected: Brody
|
|
56
|
+
- source: Верховина
|
|
57
|
+
expected: Verkhovyna
|
|
58
|
+
- source: Глухів
|
|
59
|
+
expected: Hlukhiv
|
|
60
|
+
- source: Великий
|
|
61
|
+
expected: Velykyi
|
|
62
|
+
- source: Ґрунь(гора)
|
|
63
|
+
expected: Grun’(hora)
|
|
64
|
+
- source: Димер
|
|
65
|
+
expected: Dymer
|
|
66
|
+
- source: Срібне
|
|
67
|
+
expected: Sribne
|
|
68
|
+
- source: Євпаторія
|
|
69
|
+
expected: Yevpatoriia
|
|
70
|
+
- source: Єнакієве
|
|
71
|
+
expected: Yenakiieve
|
|
72
|
+
- source: Жолква
|
|
73
|
+
expected: Zholkva
|
|
74
|
+
- source: Затока
|
|
75
|
+
expected: Zatoka
|
|
76
|
+
- source: Житомир
|
|
77
|
+
expected: Zhytomyr
|
|
78
|
+
- source: Інгул
|
|
79
|
+
expected: Inhul
|
|
80
|
+
- source: Зміїв
|
|
81
|
+
expected: Zmiïv
|
|
82
|
+
- source: Йосипівка
|
|
83
|
+
expected: Yosypivka
|
|
84
|
+
- source: Стрий
|
|
85
|
+
expected: Stryi
|
|
86
|
+
- source: Калуш
|
|
87
|
+
expected: Kalush
|
|
88
|
+
- source: Лубни
|
|
89
|
+
expected: Lubny
|
|
90
|
+
- source: Миколаїв
|
|
91
|
+
expected: Mykolaïv
|
|
92
|
+
- source: Ніжин
|
|
93
|
+
expected: Nizhyn
|
|
94
|
+
- source: Острог
|
|
95
|
+
expected: Ostroh
|
|
96
|
+
- source: Печеніги
|
|
97
|
+
expected: Pechenihy
|
|
98
|
+
- source: Рівне
|
|
99
|
+
expected: Rivne
|
|
100
|
+
- source: Сарата
|
|
101
|
+
expected: Sarata
|
|
102
|
+
- source: Тячів
|
|
103
|
+
expected: Tiachiv
|
|
104
|
+
- source: Узин
|
|
105
|
+
expected: Uzyn
|
|
106
|
+
- source: Форос
|
|
107
|
+
expected: Foros
|
|
108
|
+
- source: Харків
|
|
109
|
+
expected: Kharkiv
|
|
110
|
+
- source: Цюрупінськ
|
|
111
|
+
expected: Tsiurupins’k
|
|
112
|
+
- source: Черемош
|
|
113
|
+
expected: Cheremosh
|
|
114
|
+
- source: Шацьк
|
|
115
|
+
expected: Shats’k
|
|
116
|
+
- source: Щорс
|
|
117
|
+
expected: Shchors
|
|
118
|
+
- source: Хмельницький
|
|
119
|
+
expected: Khmel’nyts’kyi # ! Example had typo in original document "Khmel’nyts’ky"
|
|
120
|
+
- source: Юрівка
|
|
121
|
+
expected: Yurivka
|
|
122
|
+
- source: Любеч
|
|
123
|
+
expected: Liubech
|
|
124
|
+
- source: Ялта
|
|
125
|
+
expected: Yalta
|
|
126
|
+
- source: Ясіня
|
|
127
|
+
expected: Yasinia
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
map:
|
|
131
|
+
rules:
|
|
132
|
+
- pattern: (?<!\b\u2019)\b\u0404 # Є in initial position -> Ye
|
|
133
|
+
result: Ye
|
|
134
|
+
- pattern: (?<!\b\u2019)\b\u0454 # є in initial position -> ye
|
|
135
|
+
result: ye
|
|
136
|
+
- pattern: (?<!\b\u2019)\b\u0419 # Й in initial position -> Y
|
|
137
|
+
result: "Y"
|
|
138
|
+
- pattern: (?<!\b\u2019)\b\u0419 # й in initial position -> y
|
|
139
|
+
result: "y"
|
|
140
|
+
- pattern: (?<!\b\u2019)\b\u042e # Ю in initial position -> Yu
|
|
141
|
+
result: Yu
|
|
142
|
+
- pattern: (?<!\b\u2019)\b\u044e # ю in initial position -> yu
|
|
143
|
+
result: yu
|
|
144
|
+
- pattern: (?<!\b\u2019)\b\u042f # Я in initial position -> Ya
|
|
145
|
+
result: Ya
|
|
146
|
+
- pattern: (?<!\b\u2019)\b\u044f # я in initial position -> ya
|
|
147
|
+
result: ya
|
|
148
|
+
# note[1]
|
|
149
|
+
- pattern: \b\u2019\b # remove ’
|
|
150
|
+
result: ""
|
|
151
|
+
- pattern: \u042c\u041e
|
|
152
|
+
result: "IO"
|
|
153
|
+
- pattern: \u044c\u043e
|
|
154
|
+
result: "io"
|
|
155
|
+
|
|
156
|
+
characters:
|
|
157
|
+
"\u0410": "A" # А
|
|
158
|
+
"\u0411": "B" # Б
|
|
159
|
+
"\u0412": "V" # В
|
|
160
|
+
"\u0413": "H" # Г
|
|
161
|
+
"\u0490": "G" # Ґ
|
|
162
|
+
"\u0414": "D" # Д
|
|
163
|
+
"\u0415": "E" # Е
|
|
164
|
+
"\u0404": "Ie" # Є
|
|
165
|
+
"\u0416": "Zh" # Ж
|
|
166
|
+
"\u0417": "Z" # З
|
|
167
|
+
"\u0418": "Y" # И
|
|
168
|
+
"\u0406": "I" # І
|
|
169
|
+
"\u0407": "I\u0308" # Ї
|
|
170
|
+
"\u0419": "I" # Й
|
|
171
|
+
"\u041a": "K" # К
|
|
172
|
+
"\u041b": "L" # Л
|
|
173
|
+
"\u041c": "M" # М
|
|
174
|
+
"\u041d": "N" # Н
|
|
175
|
+
"\u041e": "O" # О
|
|
176
|
+
"\u041f": "P" # П
|
|
177
|
+
"\u0420": "R" # Р
|
|
178
|
+
"\u0421": "S" # С
|
|
179
|
+
"\u0422": "T" # Т
|
|
180
|
+
"\u0423": "U" # У
|
|
181
|
+
"\u0424": "F" # Ф
|
|
182
|
+
"\u0425": "Kh" # Х
|
|
183
|
+
"\u0426": "Ts" # Ц note[7]
|
|
184
|
+
"\u0427": "Ch" # Ч
|
|
185
|
+
"\u0428": "Sh" # Ш
|
|
186
|
+
"\u0429": "Shch" # Щ
|
|
187
|
+
"\u042c": "\u2019" # Ь
|
|
188
|
+
"\u042e": "Iu" # Ю
|
|
189
|
+
"\u042f": "Ia" # Я
|
|
190
|
+
"\u0430": "a" # а
|
|
191
|
+
"\u0431": "b" # б
|
|
192
|
+
"\u0432": "v" # в
|
|
193
|
+
"\u0433": "h" # г
|
|
194
|
+
"\u0491": "g" # ґ
|
|
195
|
+
"\u0434": "d" # д
|
|
196
|
+
"\u0435": "e" # е
|
|
197
|
+
"\u0454": "ie" # є
|
|
198
|
+
"\u0436": "zh" # ж
|
|
199
|
+
"\u0437": "z" # з
|
|
200
|
+
"\u0438": "y" # и
|
|
201
|
+
"\u0456": "i" # і
|
|
202
|
+
"\u0457": "i" # ї
|
|
203
|
+
"\u0439": "i" # й
|
|
204
|
+
"\u043a": "k" # к
|
|
205
|
+
"\u043b": "l" # л
|
|
206
|
+
"\u043c": "m" # м
|
|
207
|
+
"\u043d": "n" # н
|
|
208
|
+
"\u043e": "o" # о
|
|
209
|
+
"\u043f": "p" # п
|
|
210
|
+
"\u0440": "r" # р
|
|
211
|
+
"\u0441": "s" # с
|
|
212
|
+
"\u0442": "t" # т
|
|
213
|
+
"\u0443": "u" # у
|
|
214
|
+
"\u0444": "f" # ф
|
|
215
|
+
"\u0445": "kh" # х
|
|
216
|
+
"\u0446": "ts" # ц note[7]
|
|
217
|
+
"\u0447": "ch" # ч
|
|
218
|
+
"\u0448": "sh" # ш
|
|
219
|
+
"\u0449": "shch" # щ
|
|
220
|
+
"\u044e": "iu" # ю
|
|
221
|
+
"\u044f": "ia" # я
|
|
222
|
+
"\u044c": "\u2019" # ь
|