interscript 0.1.6 → 2.1.0a9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/Gemfile +29 -0
- data/LICENSE.adoc +31 -0
- data/README.md +3 -0
- data/Rakefile +53 -0
- data/bin/console +14 -0
- data/bin/interscript +3 -39
- data/bin/maps_analyze_staging +168 -0
- data/bin/maps_debug_compilers +58 -0
- data/bin/maps_debug_ordering +88 -0
- data/bin/maps_debug_ruby_compile +24 -0
- data/bin/maps_debug_step_by_step +44 -0
- data/bin/maps_optimize_order +112 -0
- data/bin/maps_v1_analyze_regexps +45 -0
- data/bin/maps_v1_to_v2 +426 -0
- data/exe/interscript +6 -0
- data/interscript.gemspec +31 -0
- data/lib/interscript.rb +81 -127
- data/lib/interscript/command.rb +5 -5
- data/lib/interscript/compiler.rb +22 -0
- data/lib/interscript/compiler/javascript.rb +292 -0
- data/lib/interscript/compiler/ruby.rb +262 -0
- data/lib/interscript/dsl.rb +67 -0
- data/lib/interscript/dsl/aliases.rb +23 -0
- data/lib/interscript/dsl/document.rb +46 -0
- data/lib/interscript/dsl/group.rb +45 -0
- data/lib/interscript/dsl/group/parallel.rb +6 -0
- data/lib/interscript/dsl/items.rb +89 -0
- data/lib/interscript/dsl/metadata.rb +26 -0
- data/lib/interscript/dsl/stage.rb +6 -0
- data/lib/interscript/dsl/symbol_mm.rb +11 -0
- data/lib/interscript/dsl/tests.rb +12 -0
- data/lib/interscript/interpreter.rb +251 -0
- data/lib/interscript/node.rb +25 -0
- data/lib/interscript/node/alias_def.rb +15 -0
- data/lib/interscript/node/dependency.rb +13 -0
- data/lib/interscript/node/document.rb +45 -0
- data/lib/interscript/node/group.rb +34 -0
- data/lib/interscript/node/group/parallel.rb +9 -0
- data/lib/interscript/node/group/sequential.rb +2 -0
- data/lib/interscript/node/item.rb +52 -0
- data/lib/interscript/node/item/alias.rb +42 -0
- data/lib/interscript/node/item/any.rb +61 -0
- data/lib/interscript/node/item/capture.rb +50 -0
- data/lib/interscript/node/item/group.rb +51 -0
- data/lib/interscript/node/item/repeat.rb +40 -0
- data/lib/interscript/node/item/stage.rb +23 -0
- data/lib/interscript/node/item/string.rb +51 -0
- data/lib/interscript/node/metadata.rb +18 -0
- data/lib/interscript/node/rule.rb +6 -0
- data/lib/interscript/node/rule/funcall.rb +18 -0
- data/lib/interscript/node/rule/run.rb +15 -0
- data/lib/interscript/node/rule/sub.rb +65 -0
- data/lib/interscript/node/stage.rb +19 -0
- data/lib/interscript/node/tests.rb +15 -0
- data/lib/interscript/stdlib.rb +211 -0
- data/lib/interscript/utils/regexp_converter.rb +283 -0
- data/lib/interscript/version.rb +1 -1
- data/requirements.txt +1 -0
- metadata +75 -339
- data/README.adoc +0 -298
- data/bin/rspec +0 -29
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/g2pwrapper.py +0 -34
- data/lib/interscript-opal.rb +0 -2
- data/lib/interscript/fs.rb +0 -71
- data/lib/interscript/mapping.rb +0 -142
- data/lib/interscript/opal.rb +0 -27
- data/lib/interscript/opal/maps.js.erb +0 -10
- data/lib/interscript/opal_map_translate.rb +0 -12
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -509
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
- data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1283
- data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -159
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
- data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -125
- data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
- data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -624
- data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -627
- data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
- data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
- data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
- data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
- data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -170
- data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
- data/maps/alalc-pan-Deva-Latn-1997.yaml +0 -237
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -221
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
- data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
- data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
- data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
- data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -528
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -592
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
- data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -285
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -701
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -19
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -200
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -92
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -162
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
- data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
- data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +0 -166
- data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
- data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
- data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
- data/maps/dos-nep-Deva-Latn-1997.yaml +0 -33
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
- data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -88
- data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
- data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
- data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -186
- data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
- data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
- data/maps/icao-per-Arab-Latn-9303.yaml +0 -103
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -609
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -40
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
- data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
- data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
- data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
- data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
- data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
- data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
- data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
- data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
- data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
- data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
- data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
- data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
- data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
- data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
- data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
- data/maps/ses-ara-Arab-Latn-1930.yaml +0 -279
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
- data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
- data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
- data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
- data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
- data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
- data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
- data/maps/un-nep-Deva-Latn-1972.yaml +0 -163
- data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
- data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -30
- data/maps/ungegn-amh-Ethi-Latn-2016.yaml +0 -575
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
- data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
- data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
- data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
- data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
- data/spec/interscript/mapping_spec.rb +0 -42
- data/spec/interscript_spec.rb +0 -26
- data/spec/spec_helper.rb +0 -3
@@ -1,420 +0,0 @@
|
|
1
|
-
---
|
2
|
-
authority_id: ungegn
|
3
|
-
id: 2017
|
4
|
-
language: ara
|
5
|
-
source_script: Arab
|
6
|
-
destination_script: Latn
|
7
|
-
name: ROMANIZATION OF ARABIC -- UNGEGN 2017 System
|
8
|
-
url: http://www.eki.ee/wgrs/rom1_ar.pdf
|
9
|
-
creation_date: 2017
|
10
|
-
confirmation date: 2018-06
|
11
|
-
description: |
|
12
|
-
The current United Nations recommended romanization
|
13
|
-
system was approved in 2017 (resolution XI/3), based on
|
14
|
-
the system adopted by Arabic experts at the conference
|
15
|
-
held in Beirut in 2007, the Unified Arabic
|
16
|
-
Transliteration System, taking into account the
|
17
|
-
practical amendments and corrections carried out and
|
18
|
-
agreed upon by the representatives of the Arabic-
|
19
|
-
speaking countries at the Fourth Arab Conference on
|
20
|
-
Geographical Names, held in Beirut in 2008, and some
|
21
|
-
clarifications and amendments agreed in Riyadh in 20171.
|
22
|
-
Previously, the United Nations had approved a
|
23
|
-
romanization system in 1972 (resolution II/8), based on the
|
24
|
-
system adopted by Arabic experts at the conference
|
25
|
-
held at Beirut in 1971 with the practical amendments carried out
|
26
|
-
and agreed upon by the representatives of the Arabic-speaking
|
27
|
-
countries at their conference. The table was published in volume
|
28
|
-
II of the conference report.
|
29
|
-
In UN resolution XI/3 it is specifically stated that the
|
30
|
-
system was recommended for the “romanization of the
|
31
|
-
geographical names within those Arabic-speaking countries
|
32
|
-
where this system is officially adopted”. There is
|
33
|
-
evidence of its partial implementation in Jordan, Oman and
|
34
|
-
Saudi Arabia. The UNGEGN Working Group on Romanization
|
35
|
-
Systems intends to continue monitoring the UN system’s
|
36
|
-
implementation across Arabic-speaking countries.
|
37
|
-
In some countries there exist local romanization schemes
|
38
|
-
or practices. The geographical names of Algeria, Djibouti,
|
39
|
-
Mauritania, Morocco and Tunisia are generally rendered in
|
40
|
-
the traditional manner which conforms to the principles of
|
41
|
-
the French orthography.
|
42
|
-
The previous UN-approved system is still found in
|
43
|
-
considerable international usage.
|
44
|
-
Arabic is written from right to left. The Arabic script
|
45
|
-
usually omits vowel points and diacritical marks from
|
46
|
-
writing which makes it difficult to obtain uniform results
|
47
|
-
in the romanization of Arabic. It is essential to identify
|
48
|
-
correctly the words which appear in any particular name
|
49
|
-
and to know the standard Arabic-script spelling including
|
50
|
-
the relevant vowels. One must also take into account
|
51
|
-
dialectal and idiosyncratic deviations. The romanization
|
52
|
-
is generally reversible though there may be some ambiguous
|
53
|
-
letter sequences (dh, kh, sh, th) which may also point to
|
54
|
-
combinations of Arabic characters in addition to the
|
55
|
-
respective single characters.
|
56
|
-
notes:
|
57
|
-
- |
|
58
|
-
When the definite article al precedes a word beginning with
|
59
|
-
one of the "sun letters" (t, th, d, dh, r, z, s, sh, s̱, ḏ, ṯ,
|
60
|
-
d͟h, l, n) the l of the definite article is assimilated with
|
61
|
-
the first consonant of the word: الشارقة Ash Shāriqah.
|
62
|
-
- |
|
63
|
-
The definite article is always written with a capital
|
64
|
-
initial: الزيتون Az Zaytūn, البلد Al Balad, منية الضنية Minyat Aḏ
|
65
|
-
Ḏinniyyah.
|
66
|
-
- |
|
67
|
-
Nunation is unlikely to be found in geographical names and
|
68
|
-
the last letter remains silent: جبل = جبلٌ Jabal (not Jabalun).
|
69
|
-
- |
|
70
|
-
In order to disambiguate certain character sequences a
|
71
|
-
middle dot (·) may be used: سهيلة S·haylah (cf. شيلة Shaylah), دهيب
|
72
|
-
D·hayb (cf. ذيب Dhayb), أدهم Ad·ham (cf. أذم Adham).
|
73
|
-
- |
|
74
|
-
ta' marboota should be transliterated to 'ah' if it's in
|
75
|
-
a definite article, or at the end of the sentence
|
76
|
-
otherwise it should be transliterated to 'at'
|
77
|
-
to handle words starting with AL and ending with ta' marboota
|
78
|
-
which is pronounced as "ah" not "at" divided into multiple
|
79
|
-
regex because lookbehind in ruby doesn't support variable length
|
80
|
-
- |
|
81
|
-
مَكّة should be transliterated to makkah, shadda above ك
|
82
|
-
is to double the consonant, same applies to all arabic letters
|
83
|
-
|
84
|
-
tests:
|
85
|
-
|
86
|
-
# Examples taken from:
|
87
|
-
# https://unstats.un.org/unsd/geoinfo/geonames/
|
88
|
-
|
89
|
-
- source: مِصر
|
90
|
-
expected: Mis̱r
|
91
|
-
|
92
|
-
- source: قَطَر
|
93
|
-
expected: Qaṯar
|
94
|
-
|
95
|
-
- source: المَغرِب
|
96
|
-
expected: Al Maghrib
|
97
|
-
|
98
|
-
- source: الجُمهُورِيَّة العِراقِيَّة
|
99
|
-
expected: Al Jumhūrīyah al ‘Irāqīyah
|
100
|
-
|
101
|
-
- source: جُمهُورِيَّة العِراق
|
102
|
-
expected: Jumhūrīyat al ‘Irāq
|
103
|
-
|
104
|
-
- source: جُمهُورِيَّة مِصر العَرَبِيَّة
|
105
|
-
expected: Jumhūrīyat Mis̱r al ‘Arabīyah
|
106
|
-
|
107
|
-
- source: بَغداد
|
108
|
-
expected: Baghdād
|
109
|
-
|
110
|
-
- source: تُونِس
|
111
|
-
expected: Tūnis
|
112
|
-
|
113
|
-
- source: السُعُودِيَّة
|
114
|
-
expected: As Su‘ūdīyah
|
115
|
-
|
116
|
-
- source: اليَمَن
|
117
|
-
expected: Al Yaman
|
118
|
-
|
119
|
-
- source: السُودان
|
120
|
-
expected: As Sūdān
|
121
|
-
|
122
|
-
- source: الجَزائِر
|
123
|
-
expected: Al Jazā'ir
|
124
|
-
|
125
|
-
- source: الجُمهُورِيَّة اللُبنانِيَّة
|
126
|
-
expected: Al Jumhūrīyah al Lubnānīyah
|
127
|
-
|
128
|
-
- source: أسمَرة
|
129
|
-
expected: Asmarah
|
130
|
-
|
131
|
-
- source: جِدَّة
|
132
|
-
expected: Jiddah
|
133
|
-
|
134
|
-
- source: مَكَّة
|
135
|
-
expected: Makkah
|
136
|
-
|
137
|
-
- source: الرِيَاض
|
138
|
-
expected: Ar Riyāḏ
|
139
|
-
|
140
|
-
map:
|
141
|
-
postrules:
|
142
|
-
- pattern: (?<=\b)(?<!\b[‘|’|'])[\u0061-\uFFFF]
|
143
|
-
result: "upcase"
|
144
|
-
# don't capitalize defined article in the middle of a sentence
|
145
|
-
- pattern : ' At T' # الت
|
146
|
-
result: ' at T'
|
147
|
-
- pattern : ' Ath Th' # الث
|
148
|
-
result: ' ath th'
|
149
|
-
- pattern : ' Ad D' # الد
|
150
|
-
result: ' ad D'
|
151
|
-
- pattern : ' Adh Dh' # الذ
|
152
|
-
result: ' adh Dh'
|
153
|
-
- pattern : ' Ar R' # الر
|
154
|
-
result: ' ar R'
|
155
|
-
- pattern : ' Az Z' # الز
|
156
|
-
result: ' az Z'
|
157
|
-
- pattern : ' As S' # الس
|
158
|
-
result: ' as S'
|
159
|
-
- pattern : ' Ash Sh' # الش
|
160
|
-
result: ' ash Sh'
|
161
|
-
- pattern : ' As̱ S̱' # الص
|
162
|
-
result: ' as̱ S̱'
|
163
|
-
- pattern : ' Aḏ Ḏ' # الض
|
164
|
-
result: ' aḏ Ḏ'
|
165
|
-
- pattern : ' Aṯ Ṯ' # الط
|
166
|
-
result: ' aṯ Ṯ'
|
167
|
-
- pattern : ' Ad͟h D͟h' # الظ
|
168
|
-
result: ' ad͟h D͟h'
|
169
|
-
- pattern : ' Al L' # الل
|
170
|
-
result: ' al L'
|
171
|
-
- pattern : ' an n' # الن
|
172
|
-
result: ' an N'
|
173
|
-
- pattern: " Al " # ال
|
174
|
-
result: " al "
|
175
|
-
|
176
|
-
characters:
|
177
|
-
|
178
|
-
# Tool used for Unicode finding:
|
179
|
-
# https://www.branah.com/unicode-converter
|
180
|
-
|
181
|
-
# pointing
|
182
|
-
'\u064e' : 'a' # َ fatha
|
183
|
-
'\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
|
184
|
-
'\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
|
185
|
-
'\u0650' : 'i' # ِ kasra
|
186
|
-
'\u064f' : 'u' # ُ damma
|
187
|
-
'\u0652' : '' # ْ sokoon, see note A below
|
188
|
-
|
189
|
-
|
190
|
-
# special pointed letters
|
191
|
-
'\u0639\u064e' : '‘a' # عَ
|
192
|
-
'\u0639\u0650' : '‘i' # عِ
|
193
|
-
'\u0639\u064f' : '‘ū' # عُ
|
194
|
-
# handle MacOS regex difference
|
195
|
-
'\u0639\u064f\u0648' : '‘ū' # عُو damma followed by و
|
196
|
-
|
197
|
-
'\u0650\u064a' : 'ī' # ـِي kasra followed by ي
|
198
|
-
'\u0650\u064a\u0651\u064e' : 'īy' # ـِيَّ
|
199
|
-
'\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
|
200
|
-
'\u064f\u0648' : 'ū' # ـُو damma followed by و
|
201
|
-
'\u064e\u0627' : 'ā' # ـَا fatha followed by ا
|
202
|
-
'\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي
|
203
|
-
'\u064e\u0648\u0652' : 'aw' # ـَوْ
|
204
|
-
'\u064e\u064a\u0652' : 'ay' # ـَيْ
|
205
|
-
'\u0622' : 'ā' # آ
|
206
|
-
|
207
|
-
# (A) Marks absence of the vowel.
|
208
|
-
# (B) Marks doubling of the consonant.
|
209
|
-
|
210
|
-
# Sun letters
|
211
|
-
'\b\u0627\u0644\u062a' : 'at t' # الت
|
212
|
-
'\b\u0627\u0644\u062b' : 'ath th' # الث
|
213
|
-
'\b\u0627\u0644\u062f' : 'ad d' # الد
|
214
|
-
'\b\u0627\u0644\u0630' : 'adh dh' # الذ
|
215
|
-
'\b\u0627\u0644\u0631' : 'ar r' # الر
|
216
|
-
'\b\u0627\u0644\u0632' : 'az z' # الز
|
217
|
-
'\b\u0627\u0644\u0633' : 'as s' # الس
|
218
|
-
'\b\u0627\u0644\u0634' : 'ash sh' # الش
|
219
|
-
'\b\u0627\u0644\u0635' : 'as̱ s̱' # الص
|
220
|
-
'\b\u0627\u0644\u0636' : 'aḏ ḏ' # الض
|
221
|
-
'\b\u0627\u0644\u0637' : 'aṯ ṯ' # الط
|
222
|
-
'\b\u0627\u0644\u0638' : 'ad͟h d͟h' # الظ
|
223
|
-
'\b\u0627\u0644\u0644' : 'al l' # الل
|
224
|
-
'\b\u0627\u0644\u0646' : 'an n' # الن
|
225
|
-
|
226
|
-
# ta' marboota
|
227
|
-
'\u0629' : 'at' # ة in the middle of the sentence
|
228
|
-
'\u0629$' : 'ah'
|
229
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
|
230
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
|
231
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
|
232
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
|
233
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
|
234
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
|
235
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
|
236
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
|
237
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
|
238
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
|
239
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
|
240
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
|
241
|
-
|
242
|
-
# shadda
|
243
|
-
|
244
|
-
'\u0628\u0651' : 'bb' # ب
|
245
|
-
'\u062a\u0651' : 'tt' # ت
|
246
|
-
'\u062b\u0651' : 'thth' # ث
|
247
|
-
'\u062c\u0651' : 'jj' # ج
|
248
|
-
'\u062d\u0651' : 'ẖẖ' # ح
|
249
|
-
'\u062e\u0651' : 'khkh' # خ
|
250
|
-
'\u062f\u0651' : 'dd' # د
|
251
|
-
'\u0630\u0651' : 'dhdh' # ذ
|
252
|
-
'\u0631\u0651' : 'rr' # ر
|
253
|
-
'\u0632\u0651' : 'zz' # ز
|
254
|
-
'\u0633\u0651' : 'ss' # س
|
255
|
-
'\u0634\u0651' : 'sh' # ش
|
256
|
-
'\u0635\u0651' : 's̱s̱' # ص
|
257
|
-
'\u0636\u0651' : 'ḏḏ' # ض
|
258
|
-
'\u0637\u0651' : 'ṯṯ' # ط
|
259
|
-
'\u0638\u0651' : 'd͟hd͟h' # ظ
|
260
|
-
'\u063a\u0651' : 'ghgh' # غ
|
261
|
-
'\u0641\u0651' : 'ff' # ف
|
262
|
-
'\u0642\u0651' : 'qq' # ق
|
263
|
-
'\u0643\u0651' : 'kk' # ك
|
264
|
-
'\u0644\u0651' : 'll' # ل
|
265
|
-
'\u0645\u0651' : 'mm' # م
|
266
|
-
'\u0646\u0651' : 'nn' # ن
|
267
|
-
'\u0647\u0651' : 'hh' # ه
|
268
|
-
'\u0648\u0651' : 'ww' # و
|
269
|
-
'\u064a\u0651' : 'yy' # ي
|
270
|
-
|
271
|
-
'\u0626' : "'" # ئ
|
272
|
-
|
273
|
-
|
274
|
-
'\u0621' : # ء
|
275
|
-
- '’'
|
276
|
-
- '' # see note A
|
277
|
-
|
278
|
-
'\u0623' : 'a' # أ
|
279
|
-
'\u0627' : 'ā' # ا
|
280
|
-
|
281
|
-
# See note B
|
282
|
-
'\b\u0627\u0644' : 'al ' # ال
|
283
|
-
# '\uFE8E' : '' # ﺎ
|
284
|
-
|
285
|
-
'\u0628' : 'b' # ب
|
286
|
-
'\uFE91' : 'b' # ﺑ
|
287
|
-
'\uFE92' : 'b' # ﺒ
|
288
|
-
'\uFE90' : 'b' # ﺐ
|
289
|
-
|
290
|
-
# See note C
|
291
|
-
'\u062a' : 't' # ت
|
292
|
-
'\ufe97' : 't' # ﺗ
|
293
|
-
'\ufe98' : 't' # ﺘ
|
294
|
-
'\ufe96' : 't' # ﺖ
|
295
|
-
|
296
|
-
'\u062b' : 'th' # ث
|
297
|
-
'\ufe9b' : 'th' # ﺛ
|
298
|
-
'\ufe9c' : 'th' # ﺜ
|
299
|
-
'\ufe9a' : 'th' # ﺚ
|
300
|
-
|
301
|
-
'\u062c' : 'j' # ج
|
302
|
-
'\ufe9f' : 'j' # ﺟ
|
303
|
-
'\ufea0' : 'j' # ﺠ
|
304
|
-
'\ufe9e' : 'j' # ﺞ
|
305
|
-
|
306
|
-
'\u062d' : 'ẖ' # ح
|
307
|
-
'\ufea3' : 'ẖ' # ﺣ
|
308
|
-
'\ufea4' : 'ẖ' # ﺤ
|
309
|
-
'\ufea2' : 'ẖ' # ﺢ
|
310
|
-
|
311
|
-
'\u062e' : 'kh' # خ
|
312
|
-
'\ufea7' : 'kh' # ﺧ
|
313
|
-
'\ufea8' : 'kh' # ﺨ
|
314
|
-
'\ufea6' : 'kh' # ﺦ
|
315
|
-
|
316
|
-
'\u062f' : 'd' # د
|
317
|
-
'\ufeaa' : 'd' # ﺪ
|
318
|
-
|
319
|
-
'\u0630' : 'dh' # ذ
|
320
|
-
'\ufeac' : 'dh' # ﺬ
|
321
|
-
|
322
|
-
'\u0631' : 'r' # ر
|
323
|
-
'\ufeae' : 'r' # ﺮ
|
324
|
-
|
325
|
-
'\u0632' : 'z' # ز
|
326
|
-
'\ufeb0' : 'z' # ﺰ
|
327
|
-
|
328
|
-
'\u0633' : 's' # س
|
329
|
-
'\ufeb3' : 's' # ﺳ
|
330
|
-
'\ufeb4' : 's' # ﺴ
|
331
|
-
'\ufeb2' : 's' # ﺲ
|
332
|
-
|
333
|
-
'\u0634' : 'sh' # ش
|
334
|
-
'\ufeb7' : 'sh' # ﺷ
|
335
|
-
'\ufeb8' : 'sh' # ﺸ
|
336
|
-
'\ufeb6' : 'sh' # ﺶ
|
337
|
-
|
338
|
-
'\u0635' : 's̱' # ص
|
339
|
-
'\ufebb' : 's̱' # ﺻ
|
340
|
-
'\ufebc' : 's̱' # ﺼ
|
341
|
-
'\ufeba' : 's̱' # ﺺ
|
342
|
-
|
343
|
-
'\u0636' : 'ḏ' # ض
|
344
|
-
'\ufebf' : 'ḏ' # ﺿ
|
345
|
-
'\ufec0' : 'ḏ' # ﻀ
|
346
|
-
'\ufebe' : 'ḏ' # ﺾ
|
347
|
-
|
348
|
-
'\u0637' : 'ṯ' # ط
|
349
|
-
'\ufec3' : 'ṯ' # ﻃ
|
350
|
-
'\ufec4' : 'ṯ' # ﻄ
|
351
|
-
'\ufec2' : 'ṯ' # ﻂ
|
352
|
-
|
353
|
-
'\u0638' : 'd͟h' # ظ
|
354
|
-
'\ufec7' : 'd͟h' # ﻇ
|
355
|
-
'\ufec8' : 'd͟h' # ﻈ
|
356
|
-
'\ufec6' : 'd͟h' # ﻆ
|
357
|
-
|
358
|
-
'\u0639' : '‘' # ع
|
359
|
-
'\ufecb' : '‘' # ﻋ
|
360
|
-
'\ufecc' : '‘' # ﻌ
|
361
|
-
'\ufeca' : '‘' # ﻊ
|
362
|
-
|
363
|
-
'\u063a' : 'gh' # غ
|
364
|
-
'\ufecf' : 'gh' # ﻏ
|
365
|
-
'\ufed0' : 'gh' # ﻐ
|
366
|
-
'\ufece' : 'gh' # ﻎ
|
367
|
-
|
368
|
-
'\u0641' : 'f' # ف
|
369
|
-
'\ufed3' : 'f' # ﻓ
|
370
|
-
'\ufed4' : 'f' # ﻔ
|
371
|
-
'\ufed2' : 'f' # ﻒ
|
372
|
-
|
373
|
-
'\u0642' : 'q' # ق
|
374
|
-
'\ufed7' : 'q' # ﻗ
|
375
|
-
'\ufed8' : 'q' # ﻘ
|
376
|
-
'\ufed6' : 'q' # ﻖ
|
377
|
-
|
378
|
-
'\u0643' : 'k' # ك
|
379
|
-
'\ufedb' : 'k' # ﻛ
|
380
|
-
'\ufedc' : 'k' # ﻜ
|
381
|
-
'\ufeda' : 'k' # ﻚ
|
382
|
-
|
383
|
-
'\u0644' : 'l' # ل
|
384
|
-
'\ufedf' : 'l' # ﻟ
|
385
|
-
'\ufee0' : 'l' # ﻠ
|
386
|
-
'\ufede' : 'l' # ﻞ
|
387
|
-
|
388
|
-
'\u0645' : 'm' # م
|
389
|
-
'\ufee3' : 'm' # ﻣ
|
390
|
-
'\ufee4' : 'm' # ﻤ
|
391
|
-
'\ufee2' : 'm' # ﻢ
|
392
|
-
|
393
|
-
'\u0646' : 'n' # ن
|
394
|
-
'\ufee7' : 'n' # ﻧ
|
395
|
-
'\ufee8' : 'n' # ﻨ
|
396
|
-
'\ufee6' : 'n' # ﻦ
|
397
|
-
|
398
|
-
# See note C
|
399
|
-
'\u0647' : 'h' # ه
|
400
|
-
'\ufeeb' : 'h' # ﻫ
|
401
|
-
'\ufeec' : 'h' # ﻬ
|
402
|
-
'\ufeea' : 'h' # ﻪ
|
403
|
-
|
404
|
-
'\u0648' : 'w' # و
|
405
|
-
'\ufeee' : 'w' # ﻮ
|
406
|
-
|
407
|
-
'\u064a' : 'y' # ي
|
408
|
-
'\ufef3' : 'y' # ﻳ
|
409
|
-
'\ufef4' : 'y' # ﻴ
|
410
|
-
'\ufef1' : 'y' # ﻱ
|
411
|
-
|
412
|
-
# (A) Not romanized word-initially.
|
413
|
-
|
414
|
-
# (B) Not romanized, but see romanizations accompanying alif (ا) in the table for vowels.
|
415
|
-
|
416
|
-
# (C) In certain endings, an original tā’ (ت) is written ة, i.e., like hā’ (ه) with two dots, and is known as tā’ marbūṯah. It is romanized h, except in the construct form of feminine nouns, where it is romanized t, instead.
|
417
|
-
|
418
|
-
|
419
|
-
# Vowels, diphthongs and diacritical marks
|
420
|
-
# (ـ stands for any consonant)
|
@@ -1,114 +0,0 @@
|
|
1
|
-
---
|
2
|
-
authority_id: un
|
3
|
-
id: 2007
|
4
|
-
language: bel
|
5
|
-
source_script: Cyrl
|
6
|
-
destination_script: Latn
|
7
|
-
name: National System of Geographic Names Transmission into Roman Alphabet in Belarus
|
8
|
-
url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/9th-uncsgn-docs/crp/9th_UNCSGN_e-conf-98-crp-21.pdf
|
9
|
-
creation_date: 2007
|
10
|
-
description: |
|
11
|
-
RESOLUTION OF THE STATE COMMITTEE
|
12
|
-
ON PROPERTY OF THE REPUBLIC OF BELARUS June 11, 2007 No. 38
|
13
|
-
|
14
|
-
8/16668 (06/18/2007) On amendments and additions to the Instructions
|
15
|
-
for the transliteration of geographical names of the
|
16
|
-
Republic of Belarus in letters of the Latin alphabet
|
17
|
-
|
18
|
-
Based on the Regulation on the State Property Committee of the Republic of Belarus,
|
19
|
-
approved by the Decree of the Council of Ministers of the Republic of Belarusdated July 29, 2006
|
20
|
-
No. 958 "Issues of the State Committee on Property of the Republic of Belarus"
|
21
|
-
tests: # the same as "by-bel-cyrl-Latn-2007"
|
22
|
-
- source: Аршанскi
|
23
|
-
expected: Aršanski
|
24
|
-
- source: Бешанковічы
|
25
|
-
expected: Biešankovičy
|
26
|
-
- source: Віцебск
|
27
|
-
expected: Viciebsk
|
28
|
-
- source: Гомель
|
29
|
-
expected: Homieĺ
|
30
|
-
- source: Гаўя
|
31
|
-
expected: Haŭja
|
32
|
-
- source: Добруш
|
33
|
-
expected: Dobruš
|
34
|
-
- source: Ельск
|
35
|
-
expected: Jeĺsk
|
36
|
-
- source: Бабаедава
|
37
|
-
expected: Babajedava
|
38
|
-
- source: Венцавічы
|
39
|
-
expected: Viencavičy
|
40
|
-
- source: Ёды
|
41
|
-
expected: Jody
|
42
|
-
- source: Вераб'ёвічы
|
43
|
-
expected: Vierabjovičy
|
44
|
-
- source: Мёры
|
45
|
-
expected: Miory
|
46
|
-
- source: Зэльва
|
47
|
-
expected: Zeĺva
|
48
|
-
- source: Iванава
|
49
|
-
expected: Ivanava
|
50
|
-
- source: Iўе
|
51
|
-
expected: Iŭje
|
52
|
-
- source: Лагойск
|
53
|
-
expected: Lahojsk
|
54
|
-
- source: Круглае
|
55
|
-
expected: Kruhlaje
|
56
|
-
- source: Лошыца
|
57
|
-
expected: Lošyca
|
58
|
-
- source: Любань
|
59
|
-
expected: Liubań
|
60
|
-
- source: Магілёў
|
61
|
-
expected: Mahilioŭ
|
62
|
-
- source: Нясвіж
|
63
|
-
expected: Niasviž
|
64
|
-
- source: Орша
|
65
|
-
expected: Orša
|
66
|
-
- source: Паставы
|
67
|
-
expected: Pastavy
|
68
|
-
- source: Рагачоў
|
69
|
-
expected: Rahačoŭ
|
70
|
-
- source: Смаргонь
|
71
|
-
expected: Smarhoń
|
72
|
-
- source: Талачын
|
73
|
-
expected: Talačyn
|
74
|
-
- source: Узда
|
75
|
-
expected: Uzda
|
76
|
-
- source: Шаркаўшчына
|
77
|
-
expected: Šarkaŭščyna
|
78
|
-
- source: Фаніпаль
|
79
|
-
expected: Fanipaĺ
|
80
|
-
- source: Хоцімск
|
81
|
-
expected: Chocimsk
|
82
|
-
- source: Цёмны Лес
|
83
|
-
expected: Ciomny Lies
|
84
|
-
- source: Чавусы
|
85
|
-
expected: Čavusy
|
86
|
-
- source: Шумілiна
|
87
|
-
expected: Šumilina
|
88
|
-
- source: Чыгірынка
|
89
|
-
expected: Čyhirynka
|
90
|
-
- source: Чэрвень
|
91
|
-
expected: Červień
|
92
|
-
- source: Друць
|
93
|
-
expected: Druć
|
94
|
-
- source: Чачэрск
|
95
|
-
expected: Čačersk
|
96
|
-
- source: Юхнаўка
|
97
|
-
expected: Juchnaŭka
|
98
|
-
- source: Гаюціна
|
99
|
-
expected: Hajucina
|
100
|
-
- source: Цюрлi
|
101
|
-
expected: Ciurli
|
102
|
-
- source: Любонічы
|
103
|
-
expected: Liuboničy
|
104
|
-
- source: Ямнае
|
105
|
-
expected: Jamnaje
|
106
|
-
- source: Баяры
|
107
|
-
expected: Bajary
|
108
|
-
- source: Валяр'яны
|
109
|
-
expected: Valiarjany
|
110
|
-
- source: Вязынка
|
111
|
-
expected: Viazynka
|
112
|
-
|
113
|
-
map:
|
114
|
-
inherit: "by-bel-Cyrl-Latn-2007"
|