interscript 0.1.0 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +250 -17
- data/bin/interscript +36 -17
- data/bin/rspec +29 -0
- data/bin/setup +8 -0
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/g2pwrapper.py +34 -0
- data/lib/interscript-opal.rb +2 -0
- data/lib/interscript.rb +138 -38
- data/lib/interscript/command.rb +28 -0
- data/lib/interscript/fs.rb +69 -0
- data/lib/interscript/mapping.rb +142 -0
- data/lib/interscript/opal.rb +23 -0
- data/lib/interscript/opal/maps.js.erb +7 -0
- data/lib/interscript/opal_map_translate.rb +12 -0
- data/lib/interscript/version.rb +1 -1
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
- data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
- data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
- data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
- data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +222 -0
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +175 -0
- data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +169 -0
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
- data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
- data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +108 -0
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +184 -0
- data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +38 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +93 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +314 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +163 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
- data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
- data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
- data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
- data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
- data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
- data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +141 -0
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +122 -0
- data/maps/icao-heb-Hebr-Latn-9303.yaml +151 -0
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +117 -0
- data/maps/icao-per-Arab-Latn-9303.yaml +104 -0
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +118 -0
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +117 -0
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +120 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +272 -0
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
- data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +110 -0
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
- data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
- data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
- data/maps/odni-mkd-cyrl-latn-2015.yaml +122 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
- data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
- data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +167 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
- data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
- data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
- data/maps/sac-zho-Hans-Latn-1979.yaml +24759 -0
- data/maps/ses-ara-arab-latn-1930.yaml +275 -0
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
- data/maps/un-ara-Arab-Latn-1971.yaml +127 -0
- data/maps/un-ara-Arab-Latn-1972.yaml +152 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +383 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
- data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +93 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
- data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
- data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
- data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
- data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
- data/spec/interscript/mapping_spec.rb +42 -0
- data/spec/interscript_spec.rb +26 -0
- data/spec/spec_helper.rb +3 -0
- metadata +295 -11
@@ -0,0 +1,108 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 1981
|
4
|
+
language: arm
|
5
|
+
source_script: Armn
|
6
|
+
destination_script: Latn
|
7
|
+
name: BGN/PCGN 1981 System
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/810208/ROMANIZATION_OF_ARMENIAN.pdf
|
9
|
+
creation_date: 2013
|
10
|
+
confirmation date: 2019-06
|
11
|
+
description: |
|
12
|
+
The BGN/PCGN system for Armenian was designed for use in romanizing
|
13
|
+
names written in the Armenian alphabet. The Roman letters and letter
|
14
|
+
combinations shown as equivalents to the Armenian characters reflect
|
15
|
+
the eastern variety of Armenian, i.e. the language spoken in the
|
16
|
+
Republic of Armenia.
|
17
|
+
|
18
|
+
notes:
|
19
|
+
- The character ե should be romanized ye initially and after the vowel characters ա, ե, է, ը, ի, ո, ու and օ. In all other instances, it should be romanized e.
|
20
|
+
- The character ո should be romanized vo initially except in the word ով, which should be roman- ized ov. In all other instances, it should be romanized o.
|
21
|
+
- In Soviet-era sources this upper-case digraph character is found as Եի (Unicode encoding 0535+056B).
|
22
|
+
- This lower-case character may be seen either in digraph form as եւ (Unicode encoding 0565+0582) or in single character form as եւ (Unicode encoding 0587).
|
23
|
+
- The characters ԵՎ , եւ and եւ should be romanized yev initially, in isolation, and after the vowel characters ա, ե, է, ը, ի, ո, ու, and օ. In all other instances these characters should be romanized ev.
|
24
|
+
- All apostrophes appearing in Armenian romanization are encoded Unicode 2019.
|
25
|
+
- The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase Roman letters as appropriate should be used.
|
26
|
+
|
27
|
+
tests:
|
28
|
+
|
29
|
+
map:
|
30
|
+
characters:
|
31
|
+
'\u0531' : 'A'
|
32
|
+
'\u0532' : 'B'
|
33
|
+
'\u0533' : 'G'
|
34
|
+
'\u0534' : 'D'
|
35
|
+
'\u0535' : 'Ye' #treated same as Russian 'ye'
|
36
|
+
'\u0536' : 'Z'
|
37
|
+
'\u0537' : 'E'
|
38
|
+
'\u0538' : 'Y'
|
39
|
+
'\u0539' : 'T\u2019'
|
40
|
+
'\u053a' : 'Zh'
|
41
|
+
'\u053b' : 'I'
|
42
|
+
'\u053c' : 'L'
|
43
|
+
'\u053d' : 'Kh'
|
44
|
+
'\u053e' : 'Ts'
|
45
|
+
'\u053f' : 'K'
|
46
|
+
'\u0540' : 'H'
|
47
|
+
'\u0541' : 'Dz'
|
48
|
+
'\u0542' : 'Gh'
|
49
|
+
'\u0543' : 'Ch'
|
50
|
+
'\u0544' : 'M'
|
51
|
+
'\u0545' : 'Y'
|
52
|
+
'\u0546' : 'N'
|
53
|
+
'\u0547' : 'Sh'
|
54
|
+
'\u0548' : 'O' # VO initially and U when in combination with \u0552
|
55
|
+
'\u0549' : u'Ch\u2019'
|
56
|
+
'\u054a' : 'P'
|
57
|
+
'\u054b' : 'J'
|
58
|
+
'\u054c' : 'Rr'
|
59
|
+
'\u054d' : 'S'
|
60
|
+
'\u054e' : 'V'
|
61
|
+
'\u054f' : 'T'
|
62
|
+
'\u0550' : 'R'
|
63
|
+
'\u0551' : 'Ts\u2019'
|
64
|
+
'\u0548\u0552' : 'U'
|
65
|
+
'\u0548\u0582' : 'U'
|
66
|
+
'\u0553' : 'P\u2019'
|
67
|
+
'\u0554' : 'K\u2019'
|
68
|
+
'\u0555' : 'O'
|
69
|
+
'\u0556' : 'F'
|
70
|
+
'\u0561' : 'a'
|
71
|
+
'\u0562' : 'b'
|
72
|
+
'\u0563' : 'g'
|
73
|
+
'\u0564' : 'd'
|
74
|
+
'\u0565' : 'e' # ye initially
|
75
|
+
'\u0566' : 'z'
|
76
|
+
'\u0567' : 'e'
|
77
|
+
'\u0568' : 'y'
|
78
|
+
'\u0569' : u't\u2019'
|
79
|
+
'\u056a' : 'zh'
|
80
|
+
'\u056b' : 'i'
|
81
|
+
'\u056c' : 'l'
|
82
|
+
'\u056d' : 'kh'
|
83
|
+
'\u056e' : 'ts'
|
84
|
+
'\u056f' : 'k'
|
85
|
+
'\u0570' : 'h'
|
86
|
+
'\u0571' : 'dz'
|
87
|
+
'\u0572' : 'gh'
|
88
|
+
'\u0573' : 'ch'
|
89
|
+
'\u0574' : 'm'
|
90
|
+
'\u0575' : 'y'
|
91
|
+
'\u0576' : 'n'
|
92
|
+
'\u0577' : 'sh'
|
93
|
+
'\u0578' : 'o' # vo initially and u when in combination with \u0582
|
94
|
+
'\u0579' : 'ch\u2019'
|
95
|
+
'\u057a' : 'p'
|
96
|
+
'\u057b' : 'j'
|
97
|
+
'\u057c' : 'rr'
|
98
|
+
'\u057d' : 's'
|
99
|
+
'\u057e' : 'v'
|
100
|
+
'\u057f' : 't'
|
101
|
+
'\u0580' : 'r'
|
102
|
+
'\u0581' : 'ts\u2019'
|
103
|
+
'\u0578\u0582' : 'u'
|
104
|
+
'\u0583' : 'p\u2019'
|
105
|
+
'\u0584' : 'k\u2019'
|
106
|
+
'\u0585' : 'o'
|
107
|
+
'\u0586' : 'f'
|
108
|
+
'\u0587' : 'ev' # yev initially
|
@@ -0,0 +1,104 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 1993
|
4
|
+
language: aze
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: AZERBAIJANI TABLE OF CORRESPONDENCES CYRILLIC-ROMAN -- BGN/PCGN 1993 Agreement
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816656/TABLE_OF_CORRESPONDENCES_FOR_AZERBAIJANI.pdf
|
9
|
+
creation_date: 1993
|
10
|
+
confirmation date: 2019-06
|
11
|
+
description: |
|
12
|
+
Azerbaijani, also known as Azeri, is the official language of the Republic of Azerbaijan. In 1991, the Azerbaijani government adopted the Roman alphabet to replace the existing Cyrillic alphabet. The presentation below provides a table of correspondences between the former Cyrillic alphabet and the current Roman alphabet. When Azerbaijani Roman-alphabet spellings are not available, this table can be used to convert Azerbaijani Cyrillic spellings.
|
13
|
+
|
14
|
+
notes:
|
15
|
+
|
16
|
+
- The special letter Ə, ə known as schwa, should be reproduced in that form whenever encountered. The characters Ə (Unicode 04D8) and ə (Unicode 04D9) should be used for schwa when writing in the Cyrillic script, but characters Ə (Unicode 018F) and ə (Unicode 0259) should be used when writing in the Roman alphabet. In those instances when it cannot be reproduced, however, the letter Ä ä may be substituted for it (see below).
|
17
|
+
|
18
|
+
- The obsolete characters й, э, ю, and я should be romanized ẏ, ė, yu., and ya.
|
19
|
+
|
20
|
+
- Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character. It is not known whether there exists an uppercase ‘J’ specific to the Cyrillic character set.
|
21
|
+
|
22
|
+
- |
|
23
|
+
An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
|
24
|
+
Ğ (U+011E), ğ (U+011F)
|
25
|
+
Ə (U+018F), ə (U+0259)
|
26
|
+
İ (U+0130), ı (U+0131)
|
27
|
+
Ö (U+00D6), ö (U+00F6)
|
28
|
+
Ü (U+00DC), ü (U+00FC)
|
29
|
+
Ç (U+00C7), ç (U+00E7)
|
30
|
+
Ş (U+015E), ş (U+015F)
|
31
|
+
|
32
|
+
- The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
|
33
|
+
|
34
|
+
tests:
|
35
|
+
- source:
|
36
|
+
expected:
|
37
|
+
|
38
|
+
map:
|
39
|
+
characters:
|
40
|
+
'\u0410' : 'A'
|
41
|
+
'\u0411' : 'B'
|
42
|
+
'\u0412' : 'G'
|
43
|
+
'\u0413' : 'V'
|
44
|
+
'\u0492' : 'Ğ'
|
45
|
+
'\u0414' : 'D'
|
46
|
+
'\u0415' : 'E'
|
47
|
+
'\u04D8' : 'Ә'
|
48
|
+
'\u0416' : 'J'
|
49
|
+
'\u0417' : 'Z'
|
50
|
+
'\u0418' : 'I'
|
51
|
+
'\u042B' : 'İ'
|
52
|
+
'\u0408' : 'Y'
|
53
|
+
'\u041A' : 'K'
|
54
|
+
'\u049C' : 'G'
|
55
|
+
'\u041B' : 'L'
|
56
|
+
'\u041C' : 'M'
|
57
|
+
'\u041D' : 'N'
|
58
|
+
'\u041E' : 'O'
|
59
|
+
'\u04E8' : 'ö'
|
60
|
+
'\u041F' : 'P'
|
61
|
+
'\u0420' : 'R'
|
62
|
+
'\u0421' : 'S'
|
63
|
+
'\u0422' : 'T'
|
64
|
+
'\u0423' : 'U'
|
65
|
+
'\u04AE' : 'Ü'
|
66
|
+
'\u0424' : 'F'
|
67
|
+
'\u0425' : 'X'
|
68
|
+
'\u04BA' : 'H'
|
69
|
+
'\u0427' : 'Ç'
|
70
|
+
'\u04B8' : 'C'
|
71
|
+
'\u0428' : 'Ş'
|
72
|
+
|
73
|
+
'\u0430' : 'a'
|
74
|
+
'\u0431' : 'b'
|
75
|
+
'\u0432' : 'v'
|
76
|
+
'\u0433' : 'g'
|
77
|
+
'\u0493' : 'ğ'
|
78
|
+
'\u0434' : 'd'
|
79
|
+
'\u0435' : 'e'
|
80
|
+
'\u04D9' : 'ә'
|
81
|
+
'\u0436' : 'j'
|
82
|
+
'\u0437' : 'z'
|
83
|
+
'\u0438' : 'i'
|
84
|
+
'\u044B' : 'ı'
|
85
|
+
'\u0458' : 'y'
|
86
|
+
'\u043A' : 'k'
|
87
|
+
'\u049D' : 'g'
|
88
|
+
'\u043B' : 'l'
|
89
|
+
'\u043C' : 'm'
|
90
|
+
'\u043D' : 'n'
|
91
|
+
'\u043E' : 'o'
|
92
|
+
'\u04E9' : 'ö'
|
93
|
+
'\u043F' : 'p'
|
94
|
+
'\u0440' : 'r'
|
95
|
+
'\u0441' : 's'
|
96
|
+
'\u0442' : 't'
|
97
|
+
'\u0443' : 'u'
|
98
|
+
'\u04AF' : 'ü'
|
99
|
+
'\u0444' : 'f'
|
100
|
+
'\u0445' : 'x'
|
101
|
+
'\u04BB' : 'h'
|
102
|
+
'\u0447' : 'ç'
|
103
|
+
'\u04B9' : 'c'
|
104
|
+
'\u0448' : 'ş'
|
@@ -0,0 +1,184 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 2007
|
4
|
+
language: rus
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: BASHKIR TABLE OF CORRESPONDENCES CYRILLIC-ROMAN BGN/PCGN 2007 Agreement
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/829203/TABLE_OF_CORRESPONDENCES__FOR_BASHKIR.pdf
|
9
|
+
creation_date: 2007
|
10
|
+
confirmation_date: 2019
|
11
|
+
description: |
|
12
|
+
Bashkir is an official language within Respublika Bashkortostan, one of the
|
13
|
+
republics of the Russian Federation. It will normally be encountered in Cyrillic script, in
|
14
|
+
which case it should be romanized by means of the Cyrillic-Roman table of
|
15
|
+
correspondences given below
|
16
|
+
|
17
|
+
notes:
|
18
|
+
- The letter w is used word initially and before a vowel. # 'and' or 'or' ?
|
19
|
+
- The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
|
20
|
+
- The letter w is used between or after vowels.
|
21
|
+
- The letter w is used after e, u, ö and ə.
|
22
|
+
- |
|
23
|
+
An inventory of letter-diacritic combinations, with their Unicode encoding,
|
24
|
+
in addition to the unmodified letters of the basic Roman script is:
|
25
|
+
Ğ (U+011E) ğ (U+011F)
|
26
|
+
Ź (U+0179) ź (U+017A)
|
27
|
+
Ë (U+00CB) ë (U+00EB)
|
28
|
+
Ñ (U+00D1) ñ (U+00F1)
|
29
|
+
Ö (U+00D6) ö (U+00F6)
|
30
|
+
Ś (U+015A) ś (U+015B)
|
31
|
+
Ü (U+00DC) ü (U+00FC)
|
32
|
+
Ç (U+00C7) ç (U+00E7)
|
33
|
+
Ş (U+015E) ş (U+015F)
|
34
|
+
Ə (U+018F) ə (U+0259)
|
35
|
+
- |
|
36
|
+
The Roman-script columns show only lowercase forms but, when applying the table,
|
37
|
+
uppercase and lowercase Roman letters as appropriate should be used.
|
38
|
+
|
39
|
+
tests:
|
40
|
+
# adopted http://www.eki.ee/knab/lat/kblba.pdf
|
41
|
+
- source: Васйылға
|
42
|
+
expected: Wasyılğa
|
43
|
+
- source: Еҙем
|
44
|
+
expected: Yeźem
|
45
|
+
- source: Раевка
|
46
|
+
expected: Raevka
|
47
|
+
- source: Сәйетҡол
|
48
|
+
expected: Səyetqol
|
49
|
+
- source: Ауырғазы
|
50
|
+
expected: Awırğazı
|
51
|
+
- source: Бурһыҡтау
|
52
|
+
expected: Burhıqtaw
|
53
|
+
- source: Мәләүез
|
54
|
+
expected: Mələwez
|
55
|
+
- source: Ҡыҙылъяр
|
56
|
+
expected: Qıźılyar
|
57
|
+
# adopted https://en.wikipedia.org/wiki/Bashkir_language#Grammar
|
58
|
+
- source: кемдең
|
59
|
+
expected: kemdeñ
|
60
|
+
- source: кем
|
61
|
+
expected: kem
|
62
|
+
- source: был
|
63
|
+
expected: bıl
|
64
|
+
- source: ошо
|
65
|
+
expected: oşo
|
66
|
+
- source: быларҙың
|
67
|
+
expected: bılarźıñ
|
68
|
+
- source: һеҙҙән
|
69
|
+
expected: heźźən
|
70
|
+
- source: һин
|
71
|
+
expected: hin
|
72
|
+
- source: һеҙҙең
|
73
|
+
expected: heźźeñ
|
74
|
+
|
75
|
+
map:
|
76
|
+
rules:
|
77
|
+
# note[1]
|
78
|
+
- pattern: \b\u0412(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
|
79
|
+
result: "W"
|
80
|
+
- pattern: \b\u0432(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
|
81
|
+
result: "w"
|
82
|
+
# note[2]
|
83
|
+
- pattern: \b\u0415
|
84
|
+
result: "Ye"
|
85
|
+
- pattern: \b\u0435
|
86
|
+
result: "ye"
|
87
|
+
- pattern: (?=\b)\u0415(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
|
88
|
+
result: "Ye"
|
89
|
+
- pattern: (?=\b)\u0435(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
|
90
|
+
result: "ye"
|
91
|
+
|
92
|
+
# note[3] # note[4]
|
93
|
+
- pattern: (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0423\u04AE]
|
94
|
+
result: W
|
95
|
+
- pattern: (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0443\u04AF]
|
96
|
+
result: w
|
97
|
+
|
98
|
+
|
99
|
+
characters:
|
100
|
+
'\u0410': 'A' # А
|
101
|
+
'\u0411': 'B' # Б note[1]
|
102
|
+
'\u0412': 'V' # В
|
103
|
+
'\u0413': 'G' # Г
|
104
|
+
'\u0492': "\u011E" # Ғ
|
105
|
+
'\u0414': 'D' # Д
|
106
|
+
'\u0498': "\u0179" # Ҙ
|
107
|
+
'\u0415': 'E' # Е note[2]
|
108
|
+
'\u0401': 'Ë' # Ё
|
109
|
+
'\u0416': 'J' # Ж
|
110
|
+
'\u0417': 'Z' # З
|
111
|
+
'\u0418': 'I' # И
|
112
|
+
'\u0419': 'Y' # Й
|
113
|
+
'\u041A': 'K' # К
|
114
|
+
'\u04A0': 'Q' # Ҡ
|
115
|
+
'\u041B': 'L' # Л
|
116
|
+
'\u041C': 'M' # М
|
117
|
+
'\u041D': 'N' # Н
|
118
|
+
'\u04A2': 'Ñ' # Ң
|
119
|
+
'\u041E': 'O' # О
|
120
|
+
'\u04E8': "ö" # Ө
|
121
|
+
'\u041F': 'P' # П
|
122
|
+
'\u0420': 'R' # Р
|
123
|
+
'\u0421': 'S' # С
|
124
|
+
'\u04AA': 'Ś' # Ҫ
|
125
|
+
'\u0422': 'T' # Т
|
126
|
+
'\u0423': 'U' # У
|
127
|
+
'\u04AE': 'Ü' # Ү note[3]
|
128
|
+
'\u0424': 'F' # Ф
|
129
|
+
'\u0425': 'X' # Х
|
130
|
+
'\u04BA': 'H' # Һ
|
131
|
+
'\u0426': 'Ts' # Ц
|
132
|
+
'\u0427': 'Ç' # Ч
|
133
|
+
'\u0428': 'Ş' # Ш
|
134
|
+
'\u0429': 'ŞÇ' # Щ
|
135
|
+
'\u042A': '' # Ъ
|
136
|
+
'\u042B': 'I' # Ы
|
137
|
+
'\u042C': '' # Ь
|
138
|
+
'\u042D': 'E' # Э
|
139
|
+
'\u04D8': "\u018F" # Ә
|
140
|
+
'\u042E': 'Yu' # Ю
|
141
|
+
'\u042F': 'Ya' # Я
|
142
|
+
|
143
|
+
'\u0430': 'a' # а
|
144
|
+
'\u0431': 'b' # б
|
145
|
+
'\u0432': 'v' # в note[1]
|
146
|
+
'\u0433': 'g' # г
|
147
|
+
'\u0493': "\u011F" # ғ
|
148
|
+
'\u0434': 'd' # д
|
149
|
+
'\u0499': 'ź' # ҙ
|
150
|
+
'\u0435': 'e' # e note[2]
|
151
|
+
'\u0451': 'yo' # ё
|
152
|
+
'\u0436': 'j' # ж
|
153
|
+
'\u0437': 'z' # з
|
154
|
+
'\u0438': 'i' # и
|
155
|
+
'\u0439': 'y' # й
|
156
|
+
'\u043A': 'k' # к
|
157
|
+
'\u04A1': 'q' # ҡ
|
158
|
+
'\u043B': 'l' # л
|
159
|
+
'\u043C': 'm' # м
|
160
|
+
'\u043D': 'n' # н
|
161
|
+
'\u04A3': 'ñ' # ң
|
162
|
+
'\u043E': 'o' # о
|
163
|
+
'\u04E9': "\u00F6" # ө
|
164
|
+
'\u043F': 'p' # п
|
165
|
+
'\u0440': 'r' # р
|
166
|
+
'\u0441': 's' # с
|
167
|
+
'\u04AB': 'ś' # ҫ
|
168
|
+
'\u0442': 't' # т
|
169
|
+
'\u0443': 'u' # у
|
170
|
+
"\u04AF": 'ü' # ү note[3]
|
171
|
+
'\u0444': 'f' # ф
|
172
|
+
'\u0445': 'x' # х
|
173
|
+
'\u04BB': 'h' # һ
|
174
|
+
'\u0446': 'ts' # ц
|
175
|
+
'\u0447': 'ç' # ч
|
176
|
+
'\u0448': 'ş' # ш
|
177
|
+
'\u0449': 'şç' # щ
|
178
|
+
'\u044A': '' # ъ
|
179
|
+
'\u044B': "\u0131" # ы
|
180
|
+
'\u044C': '' # ь
|
181
|
+
'\u044D': 'e' # э
|
182
|
+
'\u04D9': "\u0259" # ә
|
183
|
+
'\u044E': 'yu' # ю
|
184
|
+
'\u044F': 'ya' # я
|
@@ -0,0 +1,285 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 1979
|
4
|
+
language: bel
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: United States Board on Geographic Names Foreign Names Committee Staff, 1994. Romanization Systems and Roman-Script Spelling Conventions, p. 23.
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811510/ROMANIZATION_OF_BELARUSIAN.pdf
|
9
|
+
creation_date: 1979
|
10
|
+
description: |
|
11
|
+
The BGN/PCGN system for Belarusian (formerly referred to as Byelorussian) was designed for use in
|
12
|
+
romanizing names written in the Belarusian Cyrillic alphabet. The Belarusian alphabet contains three
|
13
|
+
characters not present in the Russian alphabet: і, ў, and ’.
|
14
|
+
|
15
|
+
notes:
|
16
|
+
- The character sequences зг, кг, сг, тс and цг and may be romanized z·h, k·h, s·h, t·s and ts·h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ш, ц, and the character sequence тш
|
17
|
+
- All apostrophes appearing in romanization are Unicode encoding 2019.
|
18
|
+
|
19
|
+
tests:
|
20
|
+
- source: Антон
|
21
|
+
expected: Anton
|
22
|
+
- source: Вілейка
|
23
|
+
expected: Vilyeyka
|
24
|
+
- source: Брэст
|
25
|
+
expected: Brest
|
26
|
+
- source: Дубна
|
27
|
+
expected: Dubna
|
28
|
+
- source: Віцебск
|
29
|
+
expected: Vitsyebsk
|
30
|
+
- source: Асіповічы
|
31
|
+
expected: Asipovichy
|
32
|
+
- source: Гродна
|
33
|
+
expected: Hrodna
|
34
|
+
- source: Брагін
|
35
|
+
expected: Brahin
|
36
|
+
- source: Добруш
|
37
|
+
expected: Dobrush
|
38
|
+
- source: Ліда
|
39
|
+
expected: Lida
|
40
|
+
- source: Гомель
|
41
|
+
expected: Homyel’
|
42
|
+
- source: Беліца
|
43
|
+
expected: Byelitsa
|
44
|
+
- source: Ёдкавічы
|
45
|
+
expected: Yodkavichy
|
46
|
+
- source: Нёман
|
47
|
+
expected: Nyoman
|
48
|
+
- source: Жлобін
|
49
|
+
expected: Zhlobin
|
50
|
+
- source: Ружаны
|
51
|
+
expected: Ruzhany
|
52
|
+
- source: Зоя
|
53
|
+
expected: Zoya
|
54
|
+
- source: князь
|
55
|
+
expected: knyaz’
|
56
|
+
- source: Ігнат
|
57
|
+
expected: Ihnat
|
58
|
+
- source: Мінск
|
59
|
+
expected: Minsk
|
60
|
+
- source: Йосель
|
61
|
+
expected: Yosyel’
|
62
|
+
- source: Койданава
|
63
|
+
expected: Koydanava
|
64
|
+
- source: Крапіўна
|
65
|
+
expected: Krapiwna
|
66
|
+
- source: Менск
|
67
|
+
expected: Myensk
|
68
|
+
- source: Лаўна
|
69
|
+
expected: Lawna
|
70
|
+
- source: Лёсік
|
71
|
+
expected: Lyosik
|
72
|
+
- source: Купала
|
73
|
+
expected: Kupala
|
74
|
+
- source: Вілейка
|
75
|
+
expected: Vilyeyka
|
76
|
+
- source: Міхал
|
77
|
+
expected: Mikhal
|
78
|
+
- source: Вільня
|
79
|
+
expected: Vil’nya
|
80
|
+
- source: Лепель
|
81
|
+
expected: Lyepyel’
|
82
|
+
- source: Магілёў
|
83
|
+
expected: Mahilyow
|
84
|
+
- source: Няміга
|
85
|
+
expected: Nyamiha
|
86
|
+
- source: Наваградак
|
87
|
+
expected: Navahradak
|
88
|
+
- source: Баранавічы
|
89
|
+
expected: Baranavichy
|
90
|
+
- source: Орша
|
91
|
+
expected: Orsha
|
92
|
+
- source: Востраў
|
93
|
+
expected: Vostraw
|
94
|
+
- source: Пінск
|
95
|
+
expected: Pinsk
|
96
|
+
- source: Дняпро
|
97
|
+
expected: Dnyapro
|
98
|
+
- source: Рагачоў
|
99
|
+
expected: Rahachow
|
100
|
+
- source: Сураж
|
101
|
+
expected: Surazh
|
102
|
+
- source: Смаляны
|
103
|
+
expected: Smalyany
|
104
|
+
- source: Арэса
|
105
|
+
expected: Aresa
|
106
|
+
- source: Рось
|
107
|
+
expected: Ros’
|
108
|
+
- source: Талочын
|
109
|
+
expected: Talochyn
|
110
|
+
- source: Масты
|
111
|
+
expected: Masty
|
112
|
+
- source: Уладзімір
|
113
|
+
expected: Uladzimir
|
114
|
+
- source: Бабруйск
|
115
|
+
expected: Babruysk
|
116
|
+
- source: Быхаў
|
117
|
+
expected: Bykhaw
|
118
|
+
- source: Воўпа
|
119
|
+
expected: Vowpa
|
120
|
+
- source: Іўе
|
121
|
+
expected: Iwye
|
122
|
+
- source: Фолюш
|
123
|
+
expected: Folyush
|
124
|
+
- source: фортка
|
125
|
+
expected: fortka
|
126
|
+
- source: Хатынь
|
127
|
+
expected: Khatyn’
|
128
|
+
- source: Быхаў
|
129
|
+
expected: Bykhaw
|
130
|
+
- source: Ганцавічы
|
131
|
+
expected: Hantsavichy
|
132
|
+
- source: Стоўбцы
|
133
|
+
expected: Stowbtsy
|
134
|
+
- source: цьмяны
|
135
|
+
expected: ts’myany
|
136
|
+
- source: мясцовы
|
137
|
+
expected: myastsovy
|
138
|
+
- source: Астравец
|
139
|
+
expected: Astravyets
|
140
|
+
- source: Прыпяць
|
141
|
+
expected: Prypyats’
|
142
|
+
- source: Чэрыкаў
|
143
|
+
expected: Cherykaw
|
144
|
+
- source: Шчара
|
145
|
+
expected: Shchara
|
146
|
+
- source: Нарач
|
147
|
+
expected: Narach
|
148
|
+
- source: Шклоў
|
149
|
+
expected: Shklow
|
150
|
+
- source: Ашмяны
|
151
|
+
expected: Ashmyany
|
152
|
+
- source: Ыттык-Кёль
|
153
|
+
expected: Yttyk-Kyol’
|
154
|
+
- source: Кобрын
|
155
|
+
expected: Kobryn
|
156
|
+
- source: Солы
|
157
|
+
expected: Soly
|
158
|
+
- source: Копысь
|
159
|
+
expected: Kopys’
|
160
|
+
- source: рунь
|
161
|
+
expected: run’
|
162
|
+
- source: Эйсманты
|
163
|
+
expected: Eysmanty
|
164
|
+
- source: Крэва
|
165
|
+
expected: Kreva
|
166
|
+
- source: Юры
|
167
|
+
expected: Yury
|
168
|
+
- source: уюн
|
169
|
+
expected: uyun
|
170
|
+
- source: Язэп
|
171
|
+
expected: Yazep
|
172
|
+
- source: Івянец
|
173
|
+
expected: Ivyanyets
|
174
|
+
- source: з’езд
|
175
|
+
expected: z”yezd
|
176
|
+
- source: Вялiкiя Вераб’евічы
|
177
|
+
expected: Vyalikiya Vyerab”yevichy
|
178
|
+
- source: Дзям’янаўцы
|
179
|
+
expected: Dzyam”yanawtsy
|
180
|
+
- source: Задвор’е
|
181
|
+
expected: Zadvor”ye
|
182
|
+
- source: Гезгалы
|
183
|
+
expected: Hyez·haly
|
184
|
+
- source: Вадасховішча Гезгальскае
|
185
|
+
expected: Vadaskhovishcha Hyez·hal’skaye
|
186
|
+
|
187
|
+
map:
|
188
|
+
postrules:
|
189
|
+
- pattern: '\u042C' # Ь
|
190
|
+
result: "\u2019"
|
191
|
+
- pattern: '\u044C' # ь
|
192
|
+
result: "\u2019"
|
193
|
+
# Per documentation those rules are optional
|
194
|
+
rules:
|
195
|
+
- pattern: \u0417\u0413 # ЗГ
|
196
|
+
result: "Z\u00B7H" # Z·H
|
197
|
+
- pattern: \u0437\u0433 # зг
|
198
|
+
result: "z\u00B7h" # z·h
|
199
|
+
- pattern: \u041A\u0413 # КГ
|
200
|
+
result: "K\u00B7H" # K·H
|
201
|
+
- pattern: \u043A\u0433 # кг
|
202
|
+
result: "k\u00B7h" # k·h
|
203
|
+
- pattern: \u0421\u0413 # СГ
|
204
|
+
result: "S\u00B7H" # S·H
|
205
|
+
- pattern: \u0441\u0433 # сг
|
206
|
+
result: "s\u00B7h" # s·h
|
207
|
+
- pattern: \u0422\u0421 # ТС
|
208
|
+
result: "T\u00B7S" # T·S
|
209
|
+
- pattern: \u0442\u0441 # тс
|
210
|
+
result: "t\u00B7s" # t·s
|
211
|
+
- pattern: \u0426\u0413 # ЦГ
|
212
|
+
result: "TS\u00B7H" # TS·H
|
213
|
+
- pattern: \u0446\u0433 # цг
|
214
|
+
result: "ts\u00B7h" # ts·h
|
215
|
+
|
216
|
+
characters:
|
217
|
+
'\u00B4' : "\u201D" # apostrophe according to spec
|
218
|
+
'\u02BC' : "\u201D" # apostrophe according to spec
|
219
|
+
'\u2019' : "\u201D" # apostrophe in actual examples
|
220
|
+
|
221
|
+
'\u0410' : 'A' # A
|
222
|
+
'\u0411' : 'B' # Б
|
223
|
+
'\u0412' : 'V' # B
|
224
|
+
'\u0413' : 'H' # Г
|
225
|
+
'\u0414' : 'D' # Д
|
226
|
+
'\u0415' : 'Ye' # Е
|
227
|
+
'\u0401' : 'Yo' # Ё
|
228
|
+
'\u0416' : 'Zh' # Ж
|
229
|
+
'\u0417' : 'Z' # З
|
230
|
+
'\u0406' : 'I' # І
|
231
|
+
'\u0419' : 'Y' # Й
|
232
|
+
'\u041A' : 'K' # К
|
233
|
+
'\u041B' : 'L' # Л
|
234
|
+
'\u041C' : 'M' # М
|
235
|
+
'\u041D' : 'N' # Н
|
236
|
+
'\u041E' : 'O' # О
|
237
|
+
'\u041F' : 'P' # П
|
238
|
+
'\u0420' : 'R' # Р
|
239
|
+
'\u0421' : 'S' # С
|
240
|
+
'\u0422' : 'T' # Т
|
241
|
+
'\u0423' : 'U' # У
|
242
|
+
'\U040E' : 'W' # Ў
|
243
|
+
'\u0424' : 'F' # Ф
|
244
|
+
'\u0425' : 'Kh' # Х
|
245
|
+
'\u0426' : 'Ts' # Ц
|
246
|
+
'\u0427' : 'Ch' # Ч
|
247
|
+
'\u0428' : 'Sh' # Ш
|
248
|
+
'\u042B' : 'Y' # Ы
|
249
|
+
'\u042D' : 'E' # Э
|
250
|
+
'\u042E' : 'Yu' # Ю
|
251
|
+
'\u042F' : 'Ya' # Я
|
252
|
+
'\u0490' : 'G' # Ґ
|
253
|
+
|
254
|
+
'\u0430' : 'a' # а
|
255
|
+
'\u0431' : 'b' # б
|
256
|
+
'\u0432' : 'v' # в
|
257
|
+
'\u0433' : 'h' # г
|
258
|
+
'\u0434' : 'd' # д
|
259
|
+
'\u0435' : 'ye' # е
|
260
|
+
'\u0451' : 'yo' # ё
|
261
|
+
'\u0436' : 'zh' # ж
|
262
|
+
'\u0437' : 'z' # з
|
263
|
+
'\u0456' : 'i' # і
|
264
|
+
'\u0439' : 'y' # й
|
265
|
+
'\u043A' : 'k' # к
|
266
|
+
'\u043B' : 'l' # л
|
267
|
+
'\u043C' : 'm' # м
|
268
|
+
'\u043D' : 'n' # н
|
269
|
+
'\u043E' : 'o' # о
|
270
|
+
'\u043F' : 'p' # п
|
271
|
+
'\u0440' : 'r' # р
|
272
|
+
'\u0441' : 's' # с
|
273
|
+
'\u0442' : 't' # т
|
274
|
+
'\u0443' : 'u' # у
|
275
|
+
'\u045E' : 'w' # ў
|
276
|
+
'\u0444' : 'f' # ф
|
277
|
+
'\u0445' : 'kh' # х
|
278
|
+
'\u0446' : 'ts' # ц
|
279
|
+
'\u0447' : 'ch' # ч
|
280
|
+
'\u0448' : 'sh' # ш
|
281
|
+
'\u044B' : 'y' # ы
|
282
|
+
'\u044D' : 'e' # э
|
283
|
+
'\u044E' : 'yu' # ю
|
284
|
+
'\u044F' : 'ya' # я
|
285
|
+
'\u0491' : 'g' # ґ
|