interscript 0.1.2 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +250 -17
- data/lib/g2pwrapper.py +34 -0
- data/lib/interscript.rb +142 -20
- data/lib/interscript/command.rb +28 -0
- data/lib/interscript/fs.rb +69 -0
- data/lib/interscript/mapping.rb +142 -0
- data/lib/interscript/opal.rb +57 -0
- data/lib/interscript/opal/entrypoint.rb +12 -0
- data/lib/interscript/opal/map_translate.rb +7 -0
- data/lib/interscript/opal/maps.js.erb +10 -0
- data/lib/interscript/version.rb +1 -1
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +38916 -0
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +513 -0
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
- data/maps/alalc-ara-Arab-Latn-1997.yaml +1287 -0
- data/maps/alalc-asm-Deva-Latn-1997.yaml +165 -0
- data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +145 -0
- data/maps/alalc-bel-Cyrl-Latn-1997.yaml +129 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +98 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +628 -0
- data/maps/alalc-ell-Grek-Latn-2010.yaml +626 -0
- data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
- data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
- data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
- data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
- data/maps/alalc-kat-Geor-Latn-1997.yaml +150 -0
- data/maps/alalc-kor-Hang-Latn-1997.yaml +98 -0
- data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
- data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
- data/maps/alalc-mar-Deva-Latn-1997.yaml +189 -0
- data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
- data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
- data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
- data/maps/alalc-pan-Guru-Latn-1997.yaml +256 -0
- data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
- data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
- data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
- data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +225 -0
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
- data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
- data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
- data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +118 -0
- data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
- data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
- data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +145 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
- data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +58 -33
- data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +55 -35
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
- data/maps/bgn-kor-Hang-Latn-1943.yaml +35 -0
- data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +532 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +596 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +2 -3
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +188 -0
- data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +289 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +119 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +15 -65
- data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +705 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +23 -0
- data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -2
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +131 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +163 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
- data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +230 -0
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
- data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
- data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +150 -65
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +170 -0
- data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
- data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +80 -4
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +119 -0
- data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
- data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
- data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
- data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +181 -0
- data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +172 -0
- data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
- data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
- data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
- data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
- data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
- data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
- data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
- data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
- data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
- data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
- data/maps/dos-nep-Deva-Latn-1997.yaml +47 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
- data/maps/ggg-kat-Geor-Latn-2002.yaml +92 -0
- data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
- data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
- data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +190 -0
- data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +109 -98
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +2 -7
- data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +6 -8
- data/maps/icao-heb-Hebr-Latn-9303.yaml +119 -125
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +2 -3
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +2 -4
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +2 -3
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +2 -4
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
- data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
- data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +613 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +44 -0
- data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
- data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
- data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +66 -0
- data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
- data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
- data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
- data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
- data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
- data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
- data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
- data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
- data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
- data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
- data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
- data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
- data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
- data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
- data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
- data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +4 -6
- data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
- data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
- data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
- data/maps/kp-kor-Hang-Latn-2002.yaml +909 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
- data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
- data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +807 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
- data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
- data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
- data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
- data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
- data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
- data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
- data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
- data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
- data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
- data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
- data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
- data/maps/royin-tha-Thai-Latn-1968.yaml +183 -0
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
- data/maps/royin-tha-Thai-Latn-1999.yaml +80 -0
- data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +11 -8
- data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
- data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
- data/maps/ses-ara-Arab-Latn-1930.yaml +283 -0
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +197 -0
- data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
- data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
- data/maps/un-amh-Ethi-Latn-2016.yaml +602 -0
- data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
- data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
- data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
- data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +780 -0
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
- data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
- data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
- data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
- data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
- data/maps/un-nep-Deva-Latn-1972.yaml +350 -0
- data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
- data/maps/un-ukr-Cyrl-Latn-1998.yaml +53 -0
- data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
- data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
- data/maps/var-kor-Hang-Hang-jamo.yaml +11193 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
- data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
- data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
- data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
- data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
- data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
- data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
- data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
- data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
- data/maps/var-zho-Hani-Latn-wd-1979.yaml +38912 -0
- data/spec/interscript/filenames_spec.rb +384 -0
- data/spec/interscript/mapping_spec.rb +42 -0
- data/spec/interscript_spec.rb +23 -5
- data/spec/spec_helper.rb +3 -1
- metadata +364 -34
- data/bin/interscript +0 -20
- data/bin/rspec +0 -29
- data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
- data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
- data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
- data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
- data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
- data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
- data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
- data/maps/un-mon-Mong-Latn-2013.yaml +0 -80
@@ -0,0 +1,225 @@
|
|
1
|
+
---
|
2
|
+
authority_id: mvd
|
3
|
+
id: 2008
|
4
|
+
language: iso-639-2:bel
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: About approval of the Instructions for transliteration of surnames and proper names of citizens of the Republic of Belarus when their personal data is included in the population register
|
8
|
+
url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
|
9
|
+
creation_date: 2008
|
10
|
+
|
11
|
+
description: |
|
12
|
+
RESOLUTION OF THE MINISTRY OF INTERNAL AFFAIRS OF THE REPUBLIC OF BELARUS
|
13
|
+
October 9, 2008, No. 288
|
14
|
+
8/19678 (10.23.2008)
|
15
|
+
About 8/19678 approval of the Instructions for transliteration of surnames and proper names of citizens of
|
16
|
+
the Republic of Belarus when their personal data is included in the population register
|
17
|
+
|
18
|
+
notes:
|
19
|
+
# Original notes
|
20
|
+
# - |
|
21
|
+
# Инструкция по транслитерации фамилий и собственных имен граждан Республики Беларусь при включении
|
22
|
+
# их персональных данных в регистр населения устанавливает правила передачи с белорусской либо русской формы
|
23
|
+
# написания на латиницу при включении фамилий и собственных имен граждан Республики Беларусь в регистр населения.
|
24
|
+
# - |
|
25
|
+
# Передача фамилий и собственных имен граждан Республики Беларусь на латиницу осуществляется с их правильного
|
26
|
+
# написания на белорусском либо русском языке, за свидетельствованного документами, удостоверяющими личность.
|
27
|
+
# - Передача фамилий и собственных имен граждан Республики Беларусь осуществляется путем транслитерации
|
28
|
+
# литер (букв, знаков) белорусского либорусского написания соответствующими литерами латиницы.
|
29
|
+
# - Транслитерацией достигается общность и унифицированность системы латинизированного написания,
|
30
|
+
# позволяющей пользоваться ею во всех латинопишущих государствах.
|
31
|
+
# - Смягчение согласной буквы, обозначенное мягким знаком, в белорусской латинице следует показывать
|
32
|
+
# диакритическим знаком (́), который располагается над соответствующей буквой: дзь – dź, зь – ź, ль – ĺ,
|
33
|
+
# нь – ń, сь – ś, ць – ć.
|
34
|
+
# - Правила транслитерации букв белорусского и русского алфавитов соответствующими буквами латиницы
|
35
|
+
# приведены в таблице транслитерации букв белорусского и русского алфавитов буквами латиницы соглас
|
36
|
+
# но приложению к настоящей Инструкции.
|
37
|
+
# - Сложные и составные фамилии и собственные имена, пишущиеся слитно, раздельно или через дефис,
|
38
|
+
# сохраняют слитное, раздельное или дефисное написание и в латинице.
|
39
|
+
- | # 1
|
40
|
+
Instructions for transliterating the names and first names of citizens of the Republic of Belarus
|
41
|
+
when including their personal data in the population register sets the rules for transferring
|
42
|
+
from the Belarusian or Russian form of writing in Latin when including the names and first names
|
43
|
+
of citizens of the Republic of Belarus in the population register.
|
44
|
+
- | # 2
|
45
|
+
Transfer of surnames and proper names of citizens of the Republic of Belarus to the Latin alphabet
|
46
|
+
is carried out with their correct spelling in Belarusian or Russian, for evidence of identity documents.
|
47
|
+
- | # 3
|
48
|
+
The transfer of surnames and proper names of citizens of the Republic of Belarus is carried out by
|
49
|
+
transliteration of the letters (letters, signs) of the Belarusian or Russian spelling in the corresponding
|
50
|
+
Latin letters.
|
51
|
+
- | # 4
|
52
|
+
Transliteration achieves the generality and unification of the system of Latinized writing,
|
53
|
+
which allows it to be used in all Latin-writing countries.
|
54
|
+
- | # 5
|
55
|
+
The softening of the consonant, indicated by a soft sign, in the Belarusian Latin should be shown
|
56
|
+
with a diacritic mark (́), which is located above the corresponding letter:
|
57
|
+
дзь - dź,
|
58
|
+
зь - ź,
|
59
|
+
ль - ĺ,
|
60
|
+
нь - ń,
|
61
|
+
сь - ś,
|
62
|
+
ць - ć.
|
63
|
+
# 6
|
64
|
+
- The rules for transliterating letters of the Belarusian and Russian alphabets with the corresponding
|
65
|
+
letters of the Latin alphabet are given in the table of transliteration of letters of the Belarusian
|
66
|
+
and Russian alphabets with the Latin letters according to the appendix to this Instruction.
|
67
|
+
# 7
|
68
|
+
- Compound and compound surnames and proper names, spelled together, separately or through a hyphen,
|
69
|
+
keep a single, separate or hyphen spelling in Latin.
|
70
|
+
|
71
|
+
tests:
|
72
|
+
- source: Ева
|
73
|
+
expected: Jeva
|
74
|
+
- source: Васiльева
|
75
|
+
expected: Vasiĺjeva
|
76
|
+
- source: Васiлёнак
|
77
|
+
expected: Vasilionak
|
78
|
+
- source: Ёрш
|
79
|
+
expected: Jorsh
|
80
|
+
- source: Вераб’ёў
|
81
|
+
expected: Vierabjow
|
82
|
+
- source: Салаўёва
|
83
|
+
expected: Salawjova
|
84
|
+
- source: Любоў
|
85
|
+
expected: Liubow
|
86
|
+
- source: В’юноў
|
87
|
+
expected: Vjunow
|
88
|
+
- source: Чарняк
|
89
|
+
expected: Charniak
|
90
|
+
- source: Лябецкая
|
91
|
+
expected: Liabietskaja # in reference doc it's Liabetskaja CAMOBAP waiting confirmation from officials
|
92
|
+
- source: Дар’я
|
93
|
+
expected: Darja
|
94
|
+
|
95
|
+
map:
|
96
|
+
rules:
|
97
|
+
- pattern: (\u2019\u0415) # Е
|
98
|
+
result: Je
|
99
|
+
- pattern: (\u2019\u0435) # е
|
100
|
+
result: je
|
101
|
+
- pattern: (\u2019\u0401) # Ë
|
102
|
+
result: Jo
|
103
|
+
- pattern: (\u2019\u0451) # ё
|
104
|
+
result: jo
|
105
|
+
- pattern: (\u2019\u042E) # Ю
|
106
|
+
result: Ju
|
107
|
+
- pattern: (\u2019\u044E) # ю
|
108
|
+
result: ju
|
109
|
+
- pattern: (\u2019\u042F) # Я
|
110
|
+
result: Ja
|
111
|
+
- pattern: (\u2019\u044F) # я
|
112
|
+
result: ja
|
113
|
+
|
114
|
+
- pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u0415 # Е after vowels
|
115
|
+
result: Je
|
116
|
+
- pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u0435 # е after vowels
|
117
|
+
result: je
|
118
|
+
- pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u0401 # Ё after vowels
|
119
|
+
result: Jo
|
120
|
+
- pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u0451 # ё after vowels
|
121
|
+
result: jo
|
122
|
+
- pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u042E # Ю after vowels
|
123
|
+
result: Ju
|
124
|
+
- pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u044E # ю after vowels
|
125
|
+
result: ju
|
126
|
+
- pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u042F # Я after vowels
|
127
|
+
result: Ja
|
128
|
+
- pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u044F # я after vowels
|
129
|
+
result: ja
|
130
|
+
|
131
|
+
# note[5]
|
132
|
+
- pattern: (?<=[ЗзЛлНнСсЦц])\u044C # ь after consonants
|
133
|
+
result: "\\1\u0301"
|
134
|
+
- pattern: (?<=[ЗзЛлНнСсЦц])\u02B9 # Ь after consonants
|
135
|
+
result: "\\1\u0301"
|
136
|
+
|
137
|
+
# vowels initially
|
138
|
+
- pattern: \b\u0415 # Е
|
139
|
+
result: Je
|
140
|
+
- pattern: \b\u0435 # е
|
141
|
+
result: je
|
142
|
+
- pattern: \b\u0401 # Ё
|
143
|
+
result: Jo
|
144
|
+
- pattern: \b\u0451 # ё
|
145
|
+
result: jo
|
146
|
+
- pattern: \b\u042E # Ю
|
147
|
+
result: Ju
|
148
|
+
- pattern: \b\u044E # ю
|
149
|
+
result: ju
|
150
|
+
- pattern: \b\u042F # Я
|
151
|
+
result: Ja
|
152
|
+
- pattern: \b\u044F # я
|
153
|
+
result: ja
|
154
|
+
|
155
|
+
postrules:
|
156
|
+
- pattern: \u2019
|
157
|
+
result: j
|
158
|
+
|
159
|
+
characters:
|
160
|
+
|
161
|
+
'\u0410' : 'A' # А
|
162
|
+
'\u0411' : 'B' # Б
|
163
|
+
'\u0412' : 'V' # B
|
164
|
+
'\u0413' : 'G' # Г
|
165
|
+
'\u0414' : 'D' # Д
|
166
|
+
'\u0415' : 'Ie' # Е or JE TODO add rule
|
167
|
+
'\u0401' : 'Io' # Ё or JO TODO add rule
|
168
|
+
'\u0416' : 'Zh' # Ж
|
169
|
+
'\u0417' : 'Z' # З
|
170
|
+
'\u0406' : 'I' # І
|
171
|
+
'\u0419' : "J" # Й
|
172
|
+
'\u041A' : 'K' # К
|
173
|
+
'\u041B' : 'L' # Л
|
174
|
+
'\u041C' : 'M' # М
|
175
|
+
'\u041D' : 'N' # Н
|
176
|
+
'\u041E' : 'O' # О
|
177
|
+
'\u041F' : 'P' # П
|
178
|
+
'\u0420' : 'R' # Р
|
179
|
+
'\u0421' : 'S' # С
|
180
|
+
'\u0422' : 'T' # Т
|
181
|
+
'\u0423' : 'U' # У
|
182
|
+
'\U040E' : 'W' # Ў
|
183
|
+
'\u0424' : 'F' # Ф
|
184
|
+
'\u0425' : 'Kh' # Х
|
185
|
+
'\u0426' : 'Ts' # Ц
|
186
|
+
'\u0427' : 'Ch' # Ч
|
187
|
+
'\u0428' : 'Sh' # Ш
|
188
|
+
'\u0429' : 'Shch' # Щ
|
189
|
+
'\u042B' : 'Y' # Ы
|
190
|
+
'\u042D' : 'E' # Э
|
191
|
+
'\u042E' : "Iu" # Ю
|
192
|
+
'\u042F' : "Ia" # Я
|
193
|
+
|
194
|
+
'\u0430' : 'a' # а
|
195
|
+
'\u0431' : 'b' # б
|
196
|
+
'\u0432' : 'v' # в
|
197
|
+
'\u0433' : 'g' # г
|
198
|
+
'\u0434' : 'd' # д
|
199
|
+
'\u0435' : 'ie' # е
|
200
|
+
'\u0451' : 'io' # ё
|
201
|
+
'\u0436' : 'zh' # ж
|
202
|
+
'\u0437' : 'z' # з
|
203
|
+
'\u0456' : 'i' # і
|
204
|
+
'\u0439' : 'j' # й
|
205
|
+
'\u043A' : 'k' # к
|
206
|
+
'\u043B' : 'l' # л
|
207
|
+
'\u043C' : 'm' # м
|
208
|
+
'\u043D' : 'n' # н
|
209
|
+
'\u043E' : 'o' # о
|
210
|
+
'\u043F' : 'p' # п
|
211
|
+
'\u0440' : 'r' # р
|
212
|
+
'\u0441' : 's' # с
|
213
|
+
'\u0442' : 't' # т
|
214
|
+
'\u0443' : 'u' # у
|
215
|
+
'\u045E' : 'w' # ў
|
216
|
+
'\u0444' : 'f' # ф
|
217
|
+
'\u0445' : 'kh' # х
|
218
|
+
'\u0446' : 'ts' # Ц
|
219
|
+
'\u0447' : 'ch' # ч
|
220
|
+
'\u0448' : 'sh' # ш
|
221
|
+
'\u0449' : 'shch' # щ
|
222
|
+
'\u044B' : 'y' # ы
|
223
|
+
'\u044D' : 'e' # э
|
224
|
+
'\u044E' : "iu" # ю
|
225
|
+
'\u044F' : "ia" # я
|
@@ -0,0 +1,63 @@
|
|
1
|
+
---
|
2
|
+
authority_id: mvd
|
3
|
+
id: 2010
|
4
|
+
language: iso-639-2:bel
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: |
|
8
|
+
8/22721 About approval of the Instructions on the organization of work of units of citizenship
|
9
|
+
and migration of internal affairs bodies on the issuance, registration, exchange,
|
10
|
+
invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
|
11
|
+
url: https://pravo.by/document/?guid=3871&p0=W21022721
|
12
|
+
creation_date: 2010
|
13
|
+
|
14
|
+
description: |
|
15
|
+
RESOLUTION OF THE MINISTRY OF INTERNAL AFFAIRS OF THE REPUBLIC OF BELARUS
|
16
|
+
June 28, 2010 No. 200
|
17
|
+
On approval of the Instructions on the organization of work of units of citizenship
|
18
|
+
and migration of internal affairs bodies on the issuance, registration, exchange,
|
19
|
+
invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
|
20
|
+
|
21
|
+
notes:
|
22
|
+
- |
|
23
|
+
It is not allowed to use in the spelling of the surname, own name superscripts, punctuation, except
|
24
|
+
for the use of the apostrophe in Belarusian and Latin spelling.
|
25
|
+
- The Belarusian letter "Г" is written as the Latin "H"
|
26
|
+
|
27
|
+
tests:
|
28
|
+
- source: Бабрыковіч Аляксандр
|
29
|
+
expected: Babrykovich Aliaksandr
|
30
|
+
- source: Міховіч Марыя
|
31
|
+
expected: Mikhovich Maryia
|
32
|
+
- source: Максім
|
33
|
+
expected: Maksim
|
34
|
+
- source: Іван
|
35
|
+
expected: Ivan
|
36
|
+
- source: СВЯТЛАНА
|
37
|
+
expected: SVIATLANA
|
38
|
+
- source: Ігар
|
39
|
+
expected: Ihar
|
40
|
+
- source: МІХАІЛ
|
41
|
+
expected: MIKHAIL
|
42
|
+
|
43
|
+
map:
|
44
|
+
inherit: "mvd-bel-Cyrl-Latn-2008"
|
45
|
+
|
46
|
+
rules:
|
47
|
+
# note[5]
|
48
|
+
- pattern: (?<=[ЗзЛлНнСсЦц])\u044C # ь after consonants
|
49
|
+
result: "\\1"
|
50
|
+
- pattern: (?<=[ЗзЛлНнСсЦц])\u02B9 # Ь after consonants
|
51
|
+
result: "\\1"
|
52
|
+
# Й at end
|
53
|
+
- pattern: (?<=[ЕеЁёЫыЮюЯя])\u0419$ # Я after vowels
|
54
|
+
result: ""
|
55
|
+
- pattern: (?<=[ЕеЁёЫыЮюЯя])\u0439$ # я after vowels
|
56
|
+
result: ""
|
57
|
+
|
58
|
+
characters:
|
59
|
+
'\u0413' : 'H' # Г
|
60
|
+
'\u0433' : 'h' # г
|
61
|
+
|
62
|
+
'\u042C' : '' # Ь
|
63
|
+
'\u044C' : '' # ь
|
@@ -0,0 +1,109 @@
|
|
1
|
+
---
|
2
|
+
authority_id: mvd
|
3
|
+
id: 2008
|
4
|
+
language: iso-639-2:rus
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: 8/19678 On approval of the Instructions for transliteration of surnames and proper names of citizens of the Republic of Belarus when their personal data is included in the population register
|
8
|
+
url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
|
9
|
+
creation_date: 2008
|
10
|
+
|
11
|
+
notes:
|
12
|
+
- check notes from mvd-bel-Cyrl-Latn-2008
|
13
|
+
|
14
|
+
tests:
|
15
|
+
- source: Ева
|
16
|
+
expected: Eva
|
17
|
+
- source: Васiльева
|
18
|
+
expected: Vasiĺeva
|
19
|
+
- source: Адъютантов
|
20
|
+
expected: Adjutantov
|
21
|
+
|
22
|
+
map:
|
23
|
+
rules:
|
24
|
+
# note[5]
|
25
|
+
- pattern: (?<=[ЗзЛлНнСсЦц])\u044C # ь after consonants
|
26
|
+
result: "\\1\u0301"
|
27
|
+
- pattern: (?<=[ЗзЛлНнСсЦц])\u02B9 # Ь after consonants
|
28
|
+
result: "\\1\u0301"
|
29
|
+
- pattern: ([’Ъъ]\u042E)
|
30
|
+
result: Ju
|
31
|
+
- pattern: ([’Ъъ]\u044E)
|
32
|
+
result: ju
|
33
|
+
- pattern: ([’Ъъ]\u042F)
|
34
|
+
result: Ja
|
35
|
+
- pattern: ([’Ъъ]\u044F)
|
36
|
+
result: ja
|
37
|
+
|
38
|
+
characters:
|
39
|
+
'’' : 'j'
|
40
|
+
|
41
|
+
'\u0410' : 'A' # А
|
42
|
+
'\u0411' : 'B' # Б
|
43
|
+
'\u0412' : 'V' # B
|
44
|
+
'\u0413' : 'G' # Г
|
45
|
+
'\u0414' : 'D' # Д
|
46
|
+
'\u0415' : 'E' # Е
|
47
|
+
'\u0401' : 'E' # Ё
|
48
|
+
'\u0416' : 'Zh' # Ж
|
49
|
+
'\u0417' : 'Z' # З
|
50
|
+
'\u0406' : 'I' # І
|
51
|
+
'\u0419' : "J" # Й
|
52
|
+
'\u041A' : 'K' # К
|
53
|
+
'\u041B' : 'L' # Л
|
54
|
+
'\u041C' : 'M' # М
|
55
|
+
'\u041D' : 'N' # Н
|
56
|
+
'\u041E' : 'O' # О
|
57
|
+
'\u041F' : 'P' # П
|
58
|
+
'\u0420' : 'R' # Р
|
59
|
+
'\u0421' : 'S' # С
|
60
|
+
'\u0422' : 'T' # Т
|
61
|
+
'\u0423' : 'U' # У
|
62
|
+
'\U040E' : 'W' # Ў
|
63
|
+
'\u0424' : 'F' # Ф
|
64
|
+
'\u0425' : 'Kh' # Х
|
65
|
+
'\u0426' : 'Ts' # Ц
|
66
|
+
'\u0427' : 'Ch' # Ч
|
67
|
+
'\u0428' : 'Sh' # Ш
|
68
|
+
'\u0429' : 'Shch' # Щ
|
69
|
+
'\u042A' : 'J' # Ъ
|
70
|
+
'\u042B' : 'Y' # Ы
|
71
|
+
'\u042C' : '' # Ь
|
72
|
+
'\u042D' : 'E' # Э
|
73
|
+
'\u042E' : 'Iu' # Ю
|
74
|
+
'\u042F' : 'Ia' # Я
|
75
|
+
|
76
|
+
'\u0430' : 'a' # а
|
77
|
+
'\u0431' : 'b' # б
|
78
|
+
'\u0432' : 'v' # в
|
79
|
+
'\u0433' : 'g' # г
|
80
|
+
'\u0434' : 'd' # д
|
81
|
+
'\u0435' : 'e' # е
|
82
|
+
'\u0451' : 'e' # ё
|
83
|
+
'\u0436' : 'zh' # ж
|
84
|
+
'\u0437' : 'z' # з
|
85
|
+
'\u0456' : 'i' # і
|
86
|
+
'\u0439' : 'j' # й
|
87
|
+
'\u043A' : 'k' # к
|
88
|
+
'\u043B' : 'l' # л
|
89
|
+
'\u043C' : 'm' # м
|
90
|
+
'\u043D' : 'n' # н
|
91
|
+
'\u043E' : 'o' # о
|
92
|
+
'\u043F' : 'p' # п
|
93
|
+
'\u0440' : 'r' # р
|
94
|
+
'\u0441' : 's' # с
|
95
|
+
'\u0442' : 't' # т
|
96
|
+
'\u0443' : 'u' # у
|
97
|
+
'\u045E' : 'w' # ў
|
98
|
+
'\u0444' : 'f' # ф
|
99
|
+
'\u0445' : 'kh' # х
|
100
|
+
'\u0446' : 'ts' # Ц
|
101
|
+
'\u0447' : 'ch' # ч
|
102
|
+
'\u0448' : 'sh' # ш
|
103
|
+
'\u0449' : 'shch' # щ
|
104
|
+
'\u044A' : 'j' # ъ
|
105
|
+
'\u044B' : 'y' # ы
|
106
|
+
'\u044C' : '' # ь
|
107
|
+
'\u044D' : 'e' # э
|
108
|
+
'\u044E' : 'iu' # ю
|
109
|
+
'\u044F' : 'ia' # я
|
@@ -0,0 +1,37 @@
|
|
1
|
+
---
|
2
|
+
authority_id: mvd
|
3
|
+
id: 2010
|
4
|
+
language: iso-639-2:bel
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: |
|
8
|
+
8/22721 On approval of the Instructions on the organization of work of units of citizenship
|
9
|
+
and migration of internal affairs bodies on the issuance, registration, exchange,
|
10
|
+
invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
|
11
|
+
url: https://pravo.by/document/?guid=3871&p0=W21022721
|
12
|
+
creation_date: 2010
|
13
|
+
|
14
|
+
description: |
|
15
|
+
RESOLUTION OF THE MINISTRY OF INTERNAL AFFAIRS OF THE REPUBLIC OF BELARUS
|
16
|
+
June 28, 2010 No. 200
|
17
|
+
On approval of the Instructions on the organization of work of units of citizenship
|
18
|
+
and migration of internal affairs bodies on the issuance, registration, exchange,
|
19
|
+
invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
|
20
|
+
|
21
|
+
notes:
|
22
|
+
- check notes from mvd-rus-Cyrl-Latn-2008
|
23
|
+
|
24
|
+
tests:
|
25
|
+
- source: Ева
|
26
|
+
expected: Eva
|
27
|
+
- source: Васiльева
|
28
|
+
expected: Vasileva
|
29
|
+
- source: Адъютантов
|
30
|
+
expected: Adjutantov
|
31
|
+
|
32
|
+
map:
|
33
|
+
inherit: "mvd-rus-Cyrl-Latn-2008"
|
34
|
+
|
35
|
+
postrules:
|
36
|
+
- pattern: \u0301 # remove diacritics
|
37
|
+
result: ""
|
@@ -0,0 +1,425 @@
|
|
1
|
+
---
|
2
|
+
authority_id: ungegn
|
3
|
+
id: 2017
|
4
|
+
language: ics-630-01:ara
|
5
|
+
source_script: Arab
|
6
|
+
destination_script: Latn
|
7
|
+
name: ROMANIZATION OF ARABIC -- UNGEGN 2017 System
|
8
|
+
url: https://github.com/interscript/ics-630-01/blob/master/reference-docs/ANNEX%20A%20-%20Arabic_Personal_Names_FLTS%20(U).pdf
|
9
|
+
creation_date: 2017
|
10
|
+
confirmation date: 2018-06
|
11
|
+
description: |
|
12
|
+
This system, adapted from the Board on Geographic Names, is
|
13
|
+
the Intelligence Community (IC) standard for the
|
14
|
+
transliteration of Arabic names that will be applied to all
|
15
|
+
final written reports and products for IC consumers. It is
|
16
|
+
not intended to eliminate variations of a name that can
|
17
|
+
contribute forensic information. Rather, it is to provide
|
18
|
+
an IC standard Romanized (English) transliteration from
|
19
|
+
modern standard Arabic that can then be linked to forensic
|
20
|
+
information in ways that will help identify the referent of
|
21
|
+
the name. Ambiguities can result from the Romanization of
|
22
|
+
Arabic names because the Arabic source generally omits
|
23
|
+
short vowel markings, double consonant marks, and other
|
24
|
+
diacritics that would clearly distinguish the name.
|
25
|
+
Linguists use their experience with the language and aids
|
26
|
+
such as on-line tools and name dictionaries to determine
|
27
|
+
the exact Arabic and the appropriate transliteration into
|
28
|
+
the Roman alphabet. In cases where an individual's name has
|
29
|
+
already been transliterated, that is to be indicated -- as
|
30
|
+
found -- in parentheses immediately following its rendition
|
31
|
+
in the transliteration standard (e.g., Muhammad Khulud (
|
32
|
+
Mohamed Khulood)). In addition, if the original Arabic-
|
33
|
+
script spelling is known, that spelling should also appear
|
34
|
+
in parentheses following the name, if possible, following
|
35
|
+
best practices of the issuing organization and taking into
|
36
|
+
consideration information system capabilities. This
|
37
|
+
convention is designed to ensure that vital forensic
|
38
|
+
information is not lost. For names of persons who are known
|
39
|
+
to not be part of the Arabic-speaking community, use the
|
40
|
+
relevant IC transliteration standard for names from that
|
41
|
+
language (e.g., Mikhail, Yitzhak). A translator’s note may
|
42
|
+
be used to clarify the known origin of the person. Spell
|
43
|
+
names of individuals from languages that are written in
|
44
|
+
Roman letters as they are spelled in those languages (e.g.,
|
45
|
+
George Clooney, Jorge Garcia, Georges Pompidou). In the
|
46
|
+
case of active senior government officials in the on-line
|
47
|
+
CIA World Factbook and the online directory of Chiefs of
|
48
|
+
State and Cabinet Members of Foreign Governments, the
|
49
|
+
spellings given in these on-line reference works should be
|
50
|
+
used in place of the IC Standard. For any individual who
|
51
|
+
has at one time been listed in the Factbook or Chiefs of
|
52
|
+
State directory but who no longer appears in those
|
53
|
+
resources (i.e. is no longer a government official), the IC
|
54
|
+
Standard spelling should appear first, with the spelling,
|
55
|
+
if known, as it previously appeared in those resources
|
56
|
+
listed within parentheses at the first usage. The primary
|
57
|
+
goal of this system is to produce a consistent Romanized
|
58
|
+
transcription of the name that is readable to the non-
|
59
|
+
specialist. The system uses the 26 letters of the standard (
|
60
|
+
English) Roman alphabet plus the apostrophe. Some
|
61
|
+
ambiguities in the Romanized form will occur without the
|
62
|
+
use of diacritics. However, within the context of a report,
|
63
|
+
where additional information about the individual is
|
64
|
+
provided, the referent will be clearly identified. This
|
65
|
+
system will be used in conjunction with on-line tools, name
|
66
|
+
dictionaries, and lists containing conventional spellings
|
67
|
+
of names of well-known individuals.
|
68
|
+
notes: |
|
69
|
+
- Long/Short Vowels: Long and short vowels are not
|
70
|
+
distinguished in this system Samir (could be Saamir or
|
71
|
+
Samiir in Arabic).
|
72
|
+
|
73
|
+
- Double consonants: Double consonants represented by the
|
74
|
+
Arabic shaddah are shown in most cases (e.g., Hassan,
|
75
|
+
Muhammad). Exceptions: ’ayn and consonants represented by
|
76
|
+
digraphs are not doubled (e.g., al-Qadhafi [not
|
77
|
+
alQadhdhafi], Mubashir [not Mubashshir]).
|
78
|
+
|
79
|
+
- Hamzah (glottal stop): The hamzah is represented by an
|
80
|
+
apostrophe (’). Note that this is the same symbol used to
|
81
|
+
represent another consonant, the ’ayn.
|
82
|
+
|
83
|
+
- Ta’ marbutah (feminine ending marker): On the construct
|
84
|
+
form or when pronounced “t”, it is represented with a roman
|
85
|
+
t. In all other cases, it is represented with an h.
|
86
|
+
|
87
|
+
- Digraphs: No distinction is made between digraphs such as
|
88
|
+
sh and single contiguous letters (e.g., s followed by h).
|
89
|
+
|
90
|
+
- Definite article “al” (‘the’): Follows Arabic spelling
|
91
|
+
rather than pronunciation. That is, sun letter assimilation
|
92
|
+
is not shown in the Romanized form (e.g., ’Abd-alRahman,
|
93
|
+
not ’Abd-ar-Rahman).
|
94
|
+
|
95
|
+
- Diphthongs: the second element of the diphthong is
|
96
|
+
represented by a y or a w (rather than an i or a u):
|
97
|
+
Haytham, Faysal, Tawfiq, Rawdah.
|
98
|
+
|
99
|
+
- Hyphens: Hyphens (-) are used to connect name elements
|
100
|
+
within a name: ’Abd- al Rahman, Abu-al-Bashar, Bin-Ladin.
|
101
|
+
Exceptions: Names that incorporate “Allah” as part of the
|
102
|
+
name (e.g., ’Abdallah, Nasrallah), names marked by the
|
103
|
+
lineage/family marker “Al” (e.g., Al Thani) are not
|
104
|
+
hyphenated.
|
105
|
+
|
106
|
+
- The definite article, “al”, within name phrases, is
|
107
|
+
Romanized as al and not as ul: Nur-al-Din (not Nur-ul-Din).
|
108
|
+
It is not capitalized when name-initial.
|
109
|
+
|
110
|
+
- Names that incorporate Allah as part of the name retain the
|
111
|
+
a of Allah rather than a grammatical marker u: ’Abdallah (
|
112
|
+
not ’Abdullah).
|
113
|
+
|
114
|
+
- Foreign names borrowed or appearing in Arabic are spelled
|
115
|
+
according to the standard Western tradition: Georges,
|
116
|
+
Michel. However, names of non-Arabic origin no longer
|
117
|
+
considered foreign by Arabic speakers follow the IC
|
118
|
+
conventions: Butrus (not Peter).
|
119
|
+
|
120
|
+
- Prefix بن (bin ‘son of’) is Romanized Bin unless written
|
121
|
+
with an alif, in which case it is Romanized as Ibn. The
|
122
|
+
colloquial form Bu (‘father’) should not be standardized as
|
123
|
+
Abu. These prefixes are capitalized.
|
124
|
+
|
125
|
+
- In general, Romanization follows the Modern Standard
|
126
|
+
Arabic (MSA) form rather than local pronunciation
|
127
|
+
standards. For example, the letter ج (jim) is represented
|
128
|
+
as a j even when pronounced as a “g” (e.g., Egyptian Gamal
|
129
|
+
is Romanized as Jamal).
|
130
|
+
|
131
|
+
tests:
|
132
|
+
|
133
|
+
- source: مِصر
|
134
|
+
expected: Misr
|
135
|
+
|
136
|
+
- source: قَطَر
|
137
|
+
expected: Qatar
|
138
|
+
|
139
|
+
- source: المَغرِب
|
140
|
+
expected: Al Maghrib
|
141
|
+
|
142
|
+
- source: الجُمهُورِيَّة العِراقِيَّة
|
143
|
+
expected: Al Jumhuriyah al ’Iraqiyah
|
144
|
+
|
145
|
+
- source: جُمهُورِيَّة العِراق
|
146
|
+
expected: Jumhuriyat al ’Iraq
|
147
|
+
|
148
|
+
- source: جُمهُورِيَّة مِصر العَرَبِيَّة
|
149
|
+
expected: Jumhuriyat Misr al ’Arabiyah
|
150
|
+
|
151
|
+
- source: بَغداد
|
152
|
+
expected: Baghdad
|
153
|
+
|
154
|
+
- source: تُونِس
|
155
|
+
expected: Tunis
|
156
|
+
|
157
|
+
- source: حَسّان
|
158
|
+
expected: Hassan
|
159
|
+
|
160
|
+
- source: مُحَمَّد
|
161
|
+
expected: Muhammad
|
162
|
+
|
163
|
+
- source: القَذَّافِي
|
164
|
+
expected: Al Qadhafi
|
165
|
+
|
166
|
+
- source: مُبَشِّر
|
167
|
+
expected: Mubashir
|
168
|
+
|
169
|
+
- source: الجَزائِر
|
170
|
+
expected: Al Jaza’ir
|
171
|
+
|
172
|
+
- source: عَبدالرَحمَن
|
173
|
+
expected: ’Abd-al-Rahman
|
174
|
+
|
175
|
+
- source: هَيْثَم
|
176
|
+
expected: Haytham
|
177
|
+
|
178
|
+
- source: فَيْصَل
|
179
|
+
expected: Faysal
|
180
|
+
|
181
|
+
- source: تَوْفِيق
|
182
|
+
expected: Tawfiq
|
183
|
+
|
184
|
+
- source: رَوْضَة
|
185
|
+
expected: Rawdah
|
186
|
+
|
187
|
+
- source: نُورُالدِين
|
188
|
+
expected: Nur-al-Din
|
189
|
+
|
190
|
+
- source: عَبدُاللَّه
|
191
|
+
expected: ’Abdallah
|
192
|
+
map:
|
193
|
+
postrules:
|
194
|
+
- pattern: (?<=\b)(?<!\b[‘|’|'])[\u0061-\uFFFF]
|
195
|
+
result: "upcase"
|
196
|
+
- pattern: " Al " # ال
|
197
|
+
result: " al "
|
198
|
+
- pattern: "-Al-" # ال
|
199
|
+
result: "-al-"
|
200
|
+
|
201
|
+
# don't capitalize defined article in the middle of a sentence
|
202
|
+
|
203
|
+
characters:
|
204
|
+
|
205
|
+
# Tool used for Unicode finding:
|
206
|
+
# https://www.branah.com/unicode-converter
|
207
|
+
|
208
|
+
# pointing
|
209
|
+
'\u064e' : 'a' # َ fatha
|
210
|
+
'\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
|
211
|
+
'\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
|
212
|
+
'\u0650' : 'i' # ِ kasra
|
213
|
+
'\u064f' : 'u' # ُ damma
|
214
|
+
'\u0652' : '' # ْ sokoon, see note A below
|
215
|
+
|
216
|
+
|
217
|
+
'\u0650\u064a' : 'i' # ـِي kasra followed by ي
|
218
|
+
'\u0650\u064a\u0651\u064e' : 'iy' # ـِيَّ
|
219
|
+
'\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
|
220
|
+
'\u064f\u0648' : 'u' # ـُو damma followed by و
|
221
|
+
'\u064e\u0627' : 'a' # ـَا fatha followed by ا
|
222
|
+
'\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي
|
223
|
+
'\u064e\u0648\u0652' : 'aw' # ـَوْ
|
224
|
+
'\u064e\u064a\u0652' : 'ay' # ـَيْ
|
225
|
+
'\u0622' : 'a' # آ
|
226
|
+
|
227
|
+
# ta' marboota
|
228
|
+
'\u0629' : 'at' # ة in the middle of the sentence
|
229
|
+
'\u0629$' : 'ah'
|
230
|
+
'(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
|
231
|
+
'(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
|
232
|
+
'(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
|
233
|
+
'(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
|
234
|
+
'(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
|
235
|
+
'(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
|
236
|
+
'(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
|
237
|
+
'(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
|
238
|
+
'(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
|
239
|
+
'(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
|
240
|
+
'(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
|
241
|
+
'(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
|
242
|
+
|
243
|
+
# shadda
|
244
|
+
|
245
|
+
'\u0628\u0651' : 'bb' # ب
|
246
|
+
'\u062a\u0651' : 'tt' # ت
|
247
|
+
'\u062b\u0651' : 'th' # ث
|
248
|
+
'\u062c\u0651' : 'jj' # ج
|
249
|
+
'\u062d\u0651' : 'hh' # ح
|
250
|
+
'\u062e\u0651' : 'kh' # خ
|
251
|
+
'\u062f\u0651' : 'dd' # د
|
252
|
+
'\u0630\u0651' : 'dh' # ذ
|
253
|
+
'\u0631\u0651' : 'rr' # ر
|
254
|
+
'\u0632\u0651' : 'zz' # ز
|
255
|
+
'\u0633\u0651' : 'ss' # س
|
256
|
+
'\u0634\u0651' : 'sh' # ش
|
257
|
+
'\u0635\u0651' : 'ss' # ص
|
258
|
+
'\u0636\u0651' : 'dd' # ض
|
259
|
+
'\u0637\u0651' : 'tt' # ط
|
260
|
+
'\u0638\u0651' : 'zz' # ظ
|
261
|
+
'\u063a\u0651' : 'gh' # غ
|
262
|
+
'\u0641\u0651' : 'ff' # ف
|
263
|
+
'\u0642\u0651' : 'qq' # ق
|
264
|
+
'\u0643\u0651' : 'kk' # ك
|
265
|
+
'\u0644\u0651' : 'll' # ل
|
266
|
+
'\u0645\u0651' : 'mm' # م
|
267
|
+
'\u0646\u0651' : 'nn' # ن
|
268
|
+
'\u0647\u0651' : 'hh' # ه
|
269
|
+
'\u0648\u0651' : 'ww' # و
|
270
|
+
'\u064a\u0651' : 'yy' # ي
|
271
|
+
|
272
|
+
'\u0626' : "’" # ئ
|
273
|
+
|
274
|
+
'\b\u0627\u0644\u0644\u0651\u064e\u0647': 'Allah'
|
275
|
+
|
276
|
+
'\B\u064f?\u0627\u0644\u0644\u0651\u064e\u0647': 'allah'
|
277
|
+
|
278
|
+
'\u0621' : # ء
|
279
|
+
- '’'
|
280
|
+
- ''
|
281
|
+
|
282
|
+
'\b\u0627\u0644' : 'al ' # ال
|
283
|
+
'\B\u064f?\u0627\u0644' : '-al-' # ال in middle of composite name
|
284
|
+
# '\uFE8E' : '' # ﺎ
|
285
|
+
|
286
|
+
|
287
|
+
'\u0623' : '' # أ
|
288
|
+
'\b\u0627' : '' # ا
|
289
|
+
'\u0627' : 'a' # ا
|
290
|
+
|
291
|
+
'\u0628' : 'b' # ب
|
292
|
+
'\uFE91' : 'b' # ﺑ
|
293
|
+
'\uFE92' : 'b' # ﺒ
|
294
|
+
'\uFE90' : 'b' # ﺐ
|
295
|
+
|
296
|
+
'\u062a' : 't' # ت
|
297
|
+
'\ufe97' : 't' # ﺗ
|
298
|
+
'\ufe98' : 't' # ﺘ
|
299
|
+
'\ufe96' : 't' # ﺖ
|
300
|
+
|
301
|
+
'\u062b' : 'th' # ث
|
302
|
+
'\ufe9b' : 'th' # ﺛ
|
303
|
+
'\ufe9c' : 'th' # ﺜ
|
304
|
+
'\ufe9a' : 'th' # ﺚ
|
305
|
+
|
306
|
+
'\u062c' : 'j' # ج
|
307
|
+
'\ufe9f' : 'j' # ﺟ
|
308
|
+
'\ufea0' : 'j' # ﺠ
|
309
|
+
'\ufe9e' : 'j' # ﺞ
|
310
|
+
|
311
|
+
'\u062d' : 'h' # ح
|
312
|
+
'\ufea3' : 'h' # ﺣ
|
313
|
+
'\ufea4' : 'h' # ﺤ
|
314
|
+
'\ufea2' : 'h' # ﺢ
|
315
|
+
|
316
|
+
'\u062e' : 'kh' # خ
|
317
|
+
'\ufea7' : 'kh' # ﺧ
|
318
|
+
'\ufea8' : 'kh' # ﺨ
|
319
|
+
'\ufea6' : 'kh' # ﺦ
|
320
|
+
|
321
|
+
'\u062f' : 'd' # د
|
322
|
+
'\ufeaa' : 'd' # ﺪ
|
323
|
+
|
324
|
+
'\u0630' : 'dh' # ذ
|
325
|
+
'\ufeac' : 'dh' # ﺬ
|
326
|
+
|
327
|
+
'\u0631' : 'r' # ر
|
328
|
+
'\ufeae' : 'r' # ﺮ
|
329
|
+
|
330
|
+
'\u0632' : 'z' # ز
|
331
|
+
'\ufeb0' : 'z' # ﺰ
|
332
|
+
|
333
|
+
'\u0633' : 's' # س
|
334
|
+
'\ufeb3' : 's' # ﺳ
|
335
|
+
'\ufeb4' : 's' # ﺴ
|
336
|
+
'\ufeb2' : 's' # ﺲ
|
337
|
+
|
338
|
+
'\u0634' : 'sh' # ش
|
339
|
+
'\ufeb7' : 'sh' # ﺷ
|
340
|
+
'\ufeb8' : 'sh' # ﺸ
|
341
|
+
'\ufeb6' : 'sh' # ﺶ
|
342
|
+
|
343
|
+
'\u0635' : 's' # ص
|
344
|
+
'\ufebb' : 's' # ﺻ
|
345
|
+
'\ufebc' : 's' # ﺼ
|
346
|
+
'\ufeba' : 's' # ﺺ
|
347
|
+
|
348
|
+
'\u0636' : 'd' # ض
|
349
|
+
'\ufebf' : 'd' # ﺿ
|
350
|
+
'\ufec0' : 'd' # ﻀ
|
351
|
+
'\ufebe' : 'd' # ﺾ
|
352
|
+
|
353
|
+
'\u0637' : 't' # ط
|
354
|
+
'\ufec3' : 't' # ﻃ
|
355
|
+
'\ufec4' : 't' # ﻄ
|
356
|
+
'\ufec2' : 't' # ﻂ
|
357
|
+
|
358
|
+
'\u0638' : 'z' # ظ
|
359
|
+
'\ufec7' : 'z' # ﻇ
|
360
|
+
'\ufec8' : 'z' # ﻈ
|
361
|
+
'\ufec6' : 'z' # ﻆ
|
362
|
+
|
363
|
+
'\u0639' : '’' # ع
|
364
|
+
'\ufecb' : '’' # ﻋ
|
365
|
+
'\ufecc' : '’' # ﻌ
|
366
|
+
'\ufeca' : '’' # ﻊ
|
367
|
+
|
368
|
+
'\u063a' : 'gh' # غ
|
369
|
+
'\ufecf' : 'gh' # ﻏ
|
370
|
+
'\ufed0' : 'gh' # ﻐ
|
371
|
+
'\ufece' : 'gh' # ﻎ
|
372
|
+
|
373
|
+
'\u0641' : 'f' # ف
|
374
|
+
'\ufed3' : 'f' # ﻓ
|
375
|
+
'\ufed4' : 'f' # ﻔ
|
376
|
+
'\ufed2' : 'f' # ﻒ
|
377
|
+
|
378
|
+
'\u0642' : 'q' # ق
|
379
|
+
'\ufed7' : 'q' # ﻗ
|
380
|
+
'\ufed8' : 'q' # ﻘ
|
381
|
+
'\ufed6' : 'q' # ﻖ
|
382
|
+
|
383
|
+
'\u0643' : 'k' # ك
|
384
|
+
'\ufedb' : 'k' # ﻛ
|
385
|
+
'\ufedc' : 'k' # ﻜ
|
386
|
+
'\ufeda' : 'k' # ﻚ
|
387
|
+
|
388
|
+
'\u0644' : 'l' # ل
|
389
|
+
'\ufedf' : 'l' # ﻟ
|
390
|
+
'\ufee0' : 'l' # ﻠ
|
391
|
+
'\ufede' : 'l' # ﻞ
|
392
|
+
|
393
|
+
'\u0645' : 'm' # م
|
394
|
+
'\ufee3' : 'm' # ﻣ
|
395
|
+
'\ufee4' : 'm' # ﻤ
|
396
|
+
'\ufee2' : 'm' # ﻢ
|
397
|
+
|
398
|
+
'\u0646' : 'n' # ن
|
399
|
+
'\ufee7' : 'n' # ﻧ
|
400
|
+
'\ufee8' : 'n' # ﻨ
|
401
|
+
'\ufee6' : 'n' # ﻦ
|
402
|
+
|
403
|
+
# See note C
|
404
|
+
'\u0647' : 'h' # ه
|
405
|
+
'\ufeeb' : 'h' # ﻫ
|
406
|
+
'\ufeec' : 'h' # ﻬ
|
407
|
+
'\ufeea' : 'h' # ﻪ
|
408
|
+
|
409
|
+
'\u0648' : 'w' # و
|
410
|
+
'\ufeee' : 'w' # ﻮ
|
411
|
+
|
412
|
+
'\u064a' : 'y' # ي
|
413
|
+
'\ufef3' : 'y' # ﻳ
|
414
|
+
'\ufef4' : 'y' # ﻴ
|
415
|
+
'\ufef1' : 'y' # ﻱ
|
416
|
+
|
417
|
+
# (A) Not romanized word-initially.
|
418
|
+
|
419
|
+
# (B) Not romanized, but see romanizations accompanying alif (ا) in the table for vowels.
|
420
|
+
|
421
|
+
# (C) In certain endings, an original tā’ (ت) is written ة, i.e., like hā’ (ه) with two dots, and is known as tā’ marbūṯah. It is romanized h, except in the construct form of feminine nouns, where it is romanized t, instead.
|
422
|
+
|
423
|
+
|
424
|
+
# Vowels, diphthongs and diacritical marks
|
425
|
+
# (ـ stands for any consonant)
|