interscript 0.1.5 → 2.1.0a8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/Gemfile +29 -0
- data/LICENSE.adoc +31 -0
- data/README.md +3 -0
- data/Rakefile +53 -0
- data/bin/console +14 -0
- data/bin/interscript +3 -39
- data/bin/maps_analyze_staging +168 -0
- data/bin/maps_debug_compilers +58 -0
- data/bin/maps_debug_ordering +88 -0
- data/bin/maps_debug_ruby_compile +24 -0
- data/bin/maps_debug_step_by_step +44 -0
- data/bin/maps_optimize_order +112 -0
- data/bin/maps_v1_analyze_regexps +45 -0
- data/bin/maps_v1_to_v2 +426 -0
- data/exe/interscript +6 -0
- data/interscript.gemspec +31 -0
- data/lib/interscript.rb +81 -123
- data/lib/interscript/command.rb +5 -5
- data/lib/interscript/compiler.rb +22 -0
- data/lib/interscript/compiler/javascript.rb +292 -0
- data/lib/interscript/compiler/ruby.rb +262 -0
- data/lib/interscript/dsl.rb +67 -0
- data/lib/interscript/dsl/aliases.rb +23 -0
- data/lib/interscript/dsl/document.rb +46 -0
- data/lib/interscript/dsl/group.rb +45 -0
- data/lib/interscript/dsl/group/parallel.rb +6 -0
- data/lib/interscript/dsl/items.rb +89 -0
- data/lib/interscript/dsl/metadata.rb +26 -0
- data/lib/interscript/dsl/stage.rb +6 -0
- data/lib/interscript/dsl/symbol_mm.rb +11 -0
- data/lib/interscript/dsl/tests.rb +12 -0
- data/lib/interscript/interpreter.rb +251 -0
- data/lib/interscript/node.rb +25 -0
- data/lib/interscript/node/alias_def.rb +15 -0
- data/lib/interscript/node/dependency.rb +13 -0
- data/lib/interscript/node/document.rb +45 -0
- data/lib/interscript/node/group.rb +34 -0
- data/lib/interscript/node/group/parallel.rb +9 -0
- data/lib/interscript/node/group/sequential.rb +2 -0
- data/lib/interscript/node/item.rb +52 -0
- data/lib/interscript/node/item/alias.rb +42 -0
- data/lib/interscript/node/item/any.rb +61 -0
- data/lib/interscript/node/item/capture.rb +50 -0
- data/lib/interscript/node/item/group.rb +51 -0
- data/lib/interscript/node/item/repeat.rb +40 -0
- data/lib/interscript/node/item/stage.rb +23 -0
- data/lib/interscript/node/item/string.rb +51 -0
- data/lib/interscript/node/metadata.rb +18 -0
- data/lib/interscript/node/rule.rb +6 -0
- data/lib/interscript/node/rule/funcall.rb +18 -0
- data/lib/interscript/node/rule/run.rb +15 -0
- data/lib/interscript/node/rule/sub.rb +65 -0
- data/lib/interscript/node/stage.rb +19 -0
- data/lib/interscript/node/tests.rb +15 -0
- data/lib/interscript/stdlib.rb +211 -0
- data/lib/interscript/utils/regexp_converter.rb +283 -0
- data/lib/interscript/version.rb +1 -1
- data/requirements.txt +1 -0
- metadata +73 -311
- data/README.adoc +0 -298
- data/bin/rspec +0 -29
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/g2pwrapper.py +0 -34
- data/lib/interscript-opal.rb +0 -2
- data/lib/interscript/fs.rb +0 -69
- data/lib/interscript/mapping.rb +0 -142
- data/lib/interscript/opal.rb +0 -23
- data/lib/interscript/opal/maps.js.erb +0 -7
- data/lib/interscript/opal_map_translate.rb +0 -12
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
- data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
- data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
- data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
- data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
- data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
- data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
- data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
- data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
- data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
- data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
- data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
- data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
- data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
- data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
- data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
- data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
- data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
- data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
- data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
- data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
- data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
- data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
- data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
- data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
- data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
- data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
- data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
- data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
- data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
- data/maps/odni-mkd-cyrl-latn-2015.yaml +0 -122
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
- data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
- data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
- data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
- data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
- data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
- data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
- data/maps/ses-ara-arab-latn-1930.yaml +0 -275
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
- data/maps/un-ara-Arab-Latn-1971.yaml +0 -127
- data/maps/un-ara-Arab-Latn-1972.yaml +0 -152
- data/maps/un-ara-Arab-Latn-2017.yaml +0 -383
- data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
- data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
- data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
- data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
- data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
- data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
- data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
- data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
- data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
- data/spec/interscript/mapping_spec.rb +0 -42
- data/spec/interscript_spec.rb +0 -26
- data/spec/spec_helper.rb +0 -3
@@ -1,275 +0,0 @@
|
|
1
|
-
---
|
2
|
-
authority_id: ungegn
|
3
|
-
id: 1930
|
4
|
-
language: ara
|
5
|
-
source_script: Arab
|
6
|
-
destination_script: Latn
|
7
|
-
name: ROMANIZATION OF ARABIC -- UNGEGN 2017 System
|
8
|
-
url: http://www.eki.ee/wgrs/rom1_ar.pdf
|
9
|
-
creation_date: 1930
|
10
|
-
confirmation date: 2018-06
|
11
|
-
description: |
|
12
|
-
The current United Nations recommended romanization
|
13
|
-
system was approved in 2017 (resolution XI/3), based on
|
14
|
-
the system adopted by Arabic experts at the conference
|
15
|
-
held in Beirut in 2007, the Unified Arabic
|
16
|
-
Transliteration System, taking into account the
|
17
|
-
practical amendments and corrections carried out and
|
18
|
-
agreed upon by the representatives of the Arabic-
|
19
|
-
speaking countries at the Fourth Arab Conference on
|
20
|
-
Geographical Names, held in Beirut in 2008, and some
|
21
|
-
clarifications and amendments agreed in Riyadh in 20171.
|
22
|
-
Previously, the United Nations had approved a
|
23
|
-
romanization system in 1972 (resolution II/8), based on the
|
24
|
-
system adopted by Arabic experts at the conference
|
25
|
-
held at Beirut in 1971 with the practical amendments carried out
|
26
|
-
and agreed upon by the representatives of the Arabic-speaking
|
27
|
-
countries at their conference. The table was published in volume
|
28
|
-
II of the conference report.
|
29
|
-
In UN resolution XI/3 it is specifically stated that the
|
30
|
-
system was recommended for the “romanization of the
|
31
|
-
geographical names within those Arabic-speaking countries
|
32
|
-
where this system is officially adopted”. There is
|
33
|
-
evidence of its partial implementation in Jordan, Oman and
|
34
|
-
Saudi Arabia. The UNGEGN Working Group on Romanization
|
35
|
-
Systems intends to continue monitoring the UN system’s
|
36
|
-
implementation across Arabic-speaking countries.
|
37
|
-
In some countries there exist local romanization schemes
|
38
|
-
or practices. The geographical names of Algeria, Djibouti,
|
39
|
-
Mauritania, Morocco and Tunisia are generally rendered in
|
40
|
-
the traditional manner which conforms to the principles of
|
41
|
-
the French orthography.
|
42
|
-
The previous UN-approved system is still found in
|
43
|
-
considerable international usage.
|
44
|
-
Arabic is written from right to left. The Arabic script
|
45
|
-
usually omits vowel points and diacritical marks from
|
46
|
-
writing which makes it difficult to obtain uniform results
|
47
|
-
in the romanization of Arabic. It is essential to identify
|
48
|
-
correctly the words which appear in any particular name
|
49
|
-
and to know the standard Arabic-script spelling including
|
50
|
-
the relevant vowels. One must also take into account
|
51
|
-
dialectal and idiosyncratic deviations. The romanization
|
52
|
-
is generally reversible though there may be some ambiguous
|
53
|
-
letter sequences (dh, kh, sh, th) which may also point to
|
54
|
-
combinations of Arabic characters in addition to the
|
55
|
-
respective single characters.
|
56
|
-
notes:
|
57
|
-
- |
|
58
|
-
The Survey of Egypt System (SES) of romanization has the following correspondences with
|
59
|
-
the UN system:
|
60
|
-
á = a # ـَى fatha followed by ى which is ا not ي
|
61
|
-
ā = â (a) # ـَا fatha followed by alef // آ
|
62
|
-
-ah (ة- = (a # ة ta' marboota at the end of a sentence
|
63
|
-
aw = ô (au) # ـَوْ
|
64
|
-
ay = ei (ai) # ـَيْ
|
65
|
-
ḏ = ḍ # ض
|
66
|
-
dh = dh (z) # ذ
|
67
|
-
d͟h = ẓ (d) # ظ
|
68
|
-
ẖ = ḥ # ح
|
69
|
-
ī = î
|
70
|
-
j = g (j)
|
71
|
-
q = q (k)
|
72
|
-
s = s (c)
|
73
|
-
s̱ = ṣ
|
74
|
-
ṯ = ṭ
|
75
|
-
th = th (t)
|
76
|
-
ū = û
|
77
|
-
‘ = ‛
|
78
|
-
- |
|
79
|
-
The variants in parentheses are used depending on pronunciation and tradition. Not all the
|
80
|
-
variations have been given above. The article is always written el- (El-Kafr el-Qadîm, Sharm
|
81
|
-
el-Sheikh).
|
82
|
-
tests:
|
83
|
-
|
84
|
-
# Examples taken from:
|
85
|
-
# https://unstats.un.org/unsd/geoinfo/geonames/
|
86
|
-
|
87
|
-
- source: شَرم الشَيْخ
|
88
|
-
expected: sharm el-sheikh
|
89
|
-
|
90
|
-
- source: الكَفر القَدِيم
|
91
|
-
expected: el-kafr el-qadîm
|
92
|
-
map:
|
93
|
-
inherit: "un-ara-Arab-Latn-2017"
|
94
|
-
characters:
|
95
|
-
|
96
|
-
|
97
|
-
# special pointed letters
|
98
|
-
'\u0639\u064e' : '‛a' # عَ
|
99
|
-
'\u0639\u0650' : '‛i' # عِ
|
100
|
-
'\u0639\u064f' : '‛û' # عُ
|
101
|
-
# handle MacOS regex difference
|
102
|
-
'\u0639\u064f\u0648' : '‛û' # عُو damma followed by و
|
103
|
-
'\u0650\u064a' : 'î' # ـِي kasra followed by ي
|
104
|
-
'\u0650\u064a\u0651\u064e' : 'îy' # ـِيَّ
|
105
|
-
'\u064f\u0648' : 'û' # ـُو damma followed by و
|
106
|
-
'\u064e\u0627' : # ـَا fatha followed by ا
|
107
|
-
- 'â'
|
108
|
-
- 'a'
|
109
|
-
'\u064e\u0649' : 'a' # ـَى fatha followed by ى which is ا not ي
|
110
|
-
'\u064e\u0648\u0652' : # ـَوْ
|
111
|
-
- 'ô'
|
112
|
-
- 'au'
|
113
|
-
'\u064e\u064a\u0652' : # ـَيْ
|
114
|
-
- 'ei'
|
115
|
-
- 'ai'
|
116
|
-
'\u0622' : # آ
|
117
|
-
- 'â'
|
118
|
-
- 'a'
|
119
|
-
|
120
|
-
# ta' marboota
|
121
|
-
'\u0629$' : 'a'
|
122
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'a'
|
123
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'a'
|
124
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'a'
|
125
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'a'
|
126
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'a'
|
127
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'a'
|
128
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'a'
|
129
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'a'
|
130
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'a'
|
131
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'a'
|
132
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'a'
|
133
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'a'
|
134
|
-
|
135
|
-
|
136
|
-
# Sun letters
|
137
|
-
'\b\u0627\u0644\u062a' : 'el-t' # الت
|
138
|
-
'\b\u0627\u0644\u062b' : # الث
|
139
|
-
- 'el-th'
|
140
|
-
- 'el-t'
|
141
|
-
'\b\u0627\u0644\u062f' : 'el-d' # الد
|
142
|
-
'\b\u0627\u0644\u0630' : # الذ
|
143
|
-
- 'el-dh'
|
144
|
-
- 'el-z'
|
145
|
-
'\b\u0627\u0644\u0631' : 'el-r' # الر
|
146
|
-
'\b\u0627\u0644\u0632' : 'el-z' # الز
|
147
|
-
'\b\u0627\u0644\u0633' : # الس
|
148
|
-
- 'el-s'
|
149
|
-
- 'el-c'
|
150
|
-
'\b\u0627\u0644\u0634' : 'el-sh' # الش
|
151
|
-
'\b\u0627\u0644\u0635' : 'el-ṣ' # الص
|
152
|
-
'\b\u0627\u0644\u0636' : 'el-ḍ' # الض
|
153
|
-
'\b\u0627\u0644\u0637' : 'el-ṭ' # الط
|
154
|
-
'\b\u0627\u0644\u0638' : # الظ
|
155
|
-
- 'el-ẓ'
|
156
|
-
- 'el-d'
|
157
|
-
'\b\u0627\u0644\u0644' : 'el-l' # الل
|
158
|
-
'\b\u0627\u0644\u0646' : 'el-n' # الن
|
159
|
-
|
160
|
-
|
161
|
-
# shadda
|
162
|
-
'\u062b\u0651' : # ث
|
163
|
-
- 'thth'
|
164
|
-
- 'tt'
|
165
|
-
'\u062c\u0651' : # ج
|
166
|
-
- 'gg'
|
167
|
-
- 'jj'
|
168
|
-
'\u062d\u0651' : 'ḥḥ' # ح
|
169
|
-
'\u062e\u0651' : 'khkh' # خ
|
170
|
-
|
171
|
-
'\u0633\u0651' : # س
|
172
|
-
- 'ss'
|
173
|
-
- 'cc'
|
174
|
-
'\u0635\u0651' : 'ṣṣ' # ص
|
175
|
-
'\u0636\u0651' : 'ḍḍ' # ض
|
176
|
-
'\u0637\u0651' : 'ṭṭ' # ط
|
177
|
-
'\u0638\u0651' : # ظ
|
178
|
-
- 'ẓẓ'
|
179
|
-
- 'dd'
|
180
|
-
'\u0642\u0651' : # ق
|
181
|
-
- 'qq'
|
182
|
-
- 'kk'
|
183
|
-
|
184
|
-
'\b\u0627\u0644' : 'el-' # ال
|
185
|
-
|
186
|
-
# normal letters
|
187
|
-
'\u062c' : # ج
|
188
|
-
- 'g'
|
189
|
-
- 'j'
|
190
|
-
'\ufe9f' : # ﺟ
|
191
|
-
- 'g'
|
192
|
-
- 'j'
|
193
|
-
'\ufea0' : # ﺠ
|
194
|
-
- 'g'
|
195
|
-
- 'j'
|
196
|
-
'\ufe9e' : # ﺞ
|
197
|
-
- 'g'
|
198
|
-
- 'j'
|
199
|
-
|
200
|
-
'\u062d' : 'ḥ' # ح
|
201
|
-
'\ufea3' : 'ḥ' # ﺣ
|
202
|
-
'\ufea4' : 'ḥ' # ﺤ
|
203
|
-
'\ufea2' : 'ḥ' # ﺢ
|
204
|
-
|
205
|
-
'\u062e' : 'kh' # خ
|
206
|
-
'\ufea7' : 'kh' # ﺧ
|
207
|
-
'\ufea8' : 'kh' # ﺨ
|
208
|
-
'\ufea6' : 'kh' # ﺦ
|
209
|
-
|
210
|
-
'\u0630' : # ذ
|
211
|
-
- 'dh'
|
212
|
-
- 'z'
|
213
|
-
'\ufeac' : # ﺬ
|
214
|
-
- 'dh'
|
215
|
-
- 'z'
|
216
|
-
|
217
|
-
|
218
|
-
'\u0633' : # س
|
219
|
-
- 's'
|
220
|
-
- 'c'
|
221
|
-
'\ufeb3' : # ﺳ
|
222
|
-
- 's'
|
223
|
-
- 'c'
|
224
|
-
'\ufeb4' : # ﺴ
|
225
|
-
- 's'
|
226
|
-
- 'c'
|
227
|
-
'\ufeb2' : # ﺲ
|
228
|
-
- 's'
|
229
|
-
- 'c'
|
230
|
-
|
231
|
-
'\u0635' : 'ṣ' # ص
|
232
|
-
'\ufebb' : 'ṣ' # ﺻ
|
233
|
-
'\ufebc' : 'ṣ' # ﺼ
|
234
|
-
'\ufeba' : 'ṣ' # ﺺ
|
235
|
-
|
236
|
-
'\u0636' : 'ḍ' # ض
|
237
|
-
'\ufebf' : 'ḍ' # ﺿ
|
238
|
-
'\ufec0' : 'ḍ' # ﻀ
|
239
|
-
'\ufebe' : 'ḍ' # ﺾ
|
240
|
-
|
241
|
-
'\u0637' : 'ṭ' # ط
|
242
|
-
'\ufec3' : 'ṭ' # ﻃ
|
243
|
-
'\ufec4' : 'ṭ' # ﻄ
|
244
|
-
'\ufec2' : 'ṭ' # ﻂ
|
245
|
-
|
246
|
-
'\u0639' : '‛' # ع
|
247
|
-
'\ufecb' : '‛' # ﻋ
|
248
|
-
'\ufecc' : '‛' # ﻌ
|
249
|
-
'\ufeca' : '‛' # ﻊ
|
250
|
-
|
251
|
-
'\u0638' : # ظ
|
252
|
-
- 'ẓ'
|
253
|
-
- 'd'
|
254
|
-
'\ufec7' : # ظ
|
255
|
-
- 'ẓ'
|
256
|
-
- 'd'
|
257
|
-
'\ufec8' : # ظ
|
258
|
-
- 'ẓ'
|
259
|
-
- 'd'
|
260
|
-
'\ufec6' : # ظ
|
261
|
-
- 'ẓ'
|
262
|
-
- 'd'
|
263
|
-
|
264
|
-
'\u0642' : # ق
|
265
|
-
- 'q'
|
266
|
-
- 'k'
|
267
|
-
'\ufed7' : # ﻗ
|
268
|
-
- 'q'
|
269
|
-
- 'k'
|
270
|
-
'\ufed8' : # ﻘ
|
271
|
-
- 'q'
|
272
|
-
- 'k'
|
273
|
-
'\ufed6' : # ﻖ
|
274
|
-
- 'q'
|
275
|
-
- 'k'
|
@@ -1,222 +0,0 @@
|
|
1
|
-
---
|
2
|
-
authority_id: stategeocadastre
|
3
|
-
id: 1993
|
4
|
-
language: ukr
|
5
|
-
source_script: Cyrl
|
6
|
-
destination_script: Latn
|
7
|
-
name: PROVISIONAL RULES OF REPRODUCING LETTERS OF THE UKRAINIAN ALPHABET WITH LATIN (ENGLISH) CHARACTERS
|
8
|
-
url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/17th-gegn-docs/17th_gegn_WP73.pdf
|
9
|
-
creation_date: 1993
|
10
|
-
description: |
|
11
|
-
These Rules are intended for Romanized transliteration of Ukrainian
|
12
|
-
geographic names in international cartographic editions.
|
13
|
-
|
14
|
-
Geographic names of Russia, Byelorussia, Bulgaria and other states
|
15
|
-
using the Cyrillic alphabet are transliterated according to rules
|
16
|
-
accepted in those states.
|
17
|
-
|
18
|
-
These Rules come into effect from the moment of their approval by
|
19
|
-
the Main Administration of Geodesy, Cartography and Cadastre and
|
20
|
-
will be effective until the introduction of a State standard of
|
21
|
-
Ukraine regulating the Romanized transliteration of the Ukrainian
|
22
|
-
alphabet.
|
23
|
-
|
24
|
-
notes:
|
25
|
-
- No apostrophe (’) is used in transliteration, the combination "ьо" is transliterated as "io"
|
26
|
-
- Use of capitals in Latin version of Ukrainian geographic names correspond to the Ukrainian spelling
|
27
|
-
- Generics geographical terms standing before or after a name in full or abbreviated form are transliterated
|
28
|
-
- Romanized versions of complex and compound Ukrainian toponyms (one word, hyphenated or separate words) will follow the Ukrainian spelling
|
29
|
-
- In indexes of Romanized geographical names entries must be arranged in the order of the Latin (English) alphabet
|
30
|
-
- Geographic names of Russia, Byelorussia, Bulgaria and other states using the Cyrillic alphabet are transliterated according to rules accepted in those states.
|
31
|
-
- "Ed: There seems to be a mistake in the source document. 'ц' should be replaced with ts instead 'tz'."
|
32
|
-
|
33
|
-
tests:
|
34
|
-
- source: Кам’янка # note[1]
|
35
|
-
expected: Kamianka
|
36
|
-
- source: Сьомаки # note[1]
|
37
|
-
expected: Siomaky
|
38
|
-
- source: Усть-Чорна # note[2]
|
39
|
-
expected: Ust’-Chorna
|
40
|
-
- source: Чорне море # note[2]
|
41
|
-
expected: Chorne more
|
42
|
-
- source: оз. Сиваш # note[3]
|
43
|
-
expected: oz. Syvash
|
44
|
-
- source: Кримський канал # note[3]
|
45
|
-
expected: Kryms’kyi kanal # ! Example had typo in original document "Krums’kyi kanal"
|
46
|
-
- source: Гола Пристань
|
47
|
-
expected: Hola Prystan’
|
48
|
-
- source: Корсунь Шевченківський
|
49
|
-
expected: Korsun’ Shevchenkivs’kyi
|
50
|
-
- source: Верхньодніпровськ
|
51
|
-
expected: Verkhniodniprovs’k
|
52
|
-
- source: Варва
|
53
|
-
expected: Varva
|
54
|
-
- source: Броди
|
55
|
-
expected: Brody
|
56
|
-
- source: Верховина
|
57
|
-
expected: Verkhovyna
|
58
|
-
- source: Глухів
|
59
|
-
expected: Hlukhiv
|
60
|
-
- source: Великий
|
61
|
-
expected: Velykyi
|
62
|
-
- source: Ґрунь(гора)
|
63
|
-
expected: Grun’(hora)
|
64
|
-
- source: Димер
|
65
|
-
expected: Dymer
|
66
|
-
- source: Срібне
|
67
|
-
expected: Sribne
|
68
|
-
- source: Євпаторія
|
69
|
-
expected: Yevpatoriia
|
70
|
-
- source: Єнакієве
|
71
|
-
expected: Yenakiieve
|
72
|
-
- source: Жолква
|
73
|
-
expected: Zholkva
|
74
|
-
- source: Затока
|
75
|
-
expected: Zatoka
|
76
|
-
- source: Житомир
|
77
|
-
expected: Zhytomyr
|
78
|
-
- source: Інгул
|
79
|
-
expected: Inhul
|
80
|
-
- source: Зміїв
|
81
|
-
expected: Zmiïv
|
82
|
-
- source: Йосипівка
|
83
|
-
expected: Yosypivka
|
84
|
-
- source: Стрий
|
85
|
-
expected: Stryi
|
86
|
-
- source: Калуш
|
87
|
-
expected: Kalush
|
88
|
-
- source: Лубни
|
89
|
-
expected: Lubny
|
90
|
-
- source: Миколаїв
|
91
|
-
expected: Mykolaïv
|
92
|
-
- source: Ніжин
|
93
|
-
expected: Nizhyn
|
94
|
-
- source: Острог
|
95
|
-
expected: Ostroh
|
96
|
-
- source: Печеніги
|
97
|
-
expected: Pechenihy
|
98
|
-
- source: Рівне
|
99
|
-
expected: Rivne
|
100
|
-
- source: Сарата
|
101
|
-
expected: Sarata
|
102
|
-
- source: Тячів
|
103
|
-
expected: Tiachiv
|
104
|
-
- source: Узин
|
105
|
-
expected: Uzyn
|
106
|
-
- source: Форос
|
107
|
-
expected: Foros
|
108
|
-
- source: Харків
|
109
|
-
expected: Kharkiv
|
110
|
-
- source: Цюрупінськ
|
111
|
-
expected: Tsiurupins’k
|
112
|
-
- source: Черемош
|
113
|
-
expected: Cheremosh
|
114
|
-
- source: Шацьк
|
115
|
-
expected: Shats’k
|
116
|
-
- source: Щорс
|
117
|
-
expected: Shchors
|
118
|
-
- source: Хмельницький
|
119
|
-
expected: Khmel’nyts’kyi # ! Example had typo in original document "Khmel’nyts’ky"
|
120
|
-
- source: Юрівка
|
121
|
-
expected: Yurivka
|
122
|
-
- source: Любеч
|
123
|
-
expected: Liubech
|
124
|
-
- source: Ялта
|
125
|
-
expected: Yalta
|
126
|
-
- source: Ясіня
|
127
|
-
expected: Yasinia
|
128
|
-
|
129
|
-
|
130
|
-
map:
|
131
|
-
rules:
|
132
|
-
- pattern: (?<!\b\u2019)\b\u0404 # Є in initial position -> Ye
|
133
|
-
result: Ye
|
134
|
-
- pattern: (?<!\b\u2019)\b\u0454 # є in initial position -> ye
|
135
|
-
result: ye
|
136
|
-
- pattern: (?<!\b\u2019)\b\u0419 # Й in initial position -> Y
|
137
|
-
result: "Y"
|
138
|
-
- pattern: (?<!\b\u2019)\b\u0419 # й in initial position -> y
|
139
|
-
result: "y"
|
140
|
-
- pattern: (?<!\b\u2019)\b\u042e # Ю in initial position -> Yu
|
141
|
-
result: Yu
|
142
|
-
- pattern: (?<!\b\u2019)\b\u044e # ю in initial position -> yu
|
143
|
-
result: yu
|
144
|
-
- pattern: (?<!\b\u2019)\b\u042f # Я in initial position -> Ya
|
145
|
-
result: Ya
|
146
|
-
- pattern: (?<!\b\u2019)\b\u044f # я in initial position -> ya
|
147
|
-
result: ya
|
148
|
-
# note[1]
|
149
|
-
- pattern: \b\u2019\b # remove ’
|
150
|
-
result: ""
|
151
|
-
- pattern: \u042c\u041e
|
152
|
-
result: "IO"
|
153
|
-
- pattern: \u044c\u043e
|
154
|
-
result: "io"
|
155
|
-
|
156
|
-
characters:
|
157
|
-
"\u0410": "A" # А
|
158
|
-
"\u0411": "B" # Б
|
159
|
-
"\u0412": "V" # В
|
160
|
-
"\u0413": "H" # Г
|
161
|
-
"\u0490": "G" # Ґ
|
162
|
-
"\u0414": "D" # Д
|
163
|
-
"\u0415": "E" # Е
|
164
|
-
"\u0404": "Ie" # Є
|
165
|
-
"\u0416": "Zh" # Ж
|
166
|
-
"\u0417": "Z" # З
|
167
|
-
"\u0418": "Y" # И
|
168
|
-
"\u0406": "I" # І
|
169
|
-
"\u0407": "I\u0308" # Ї
|
170
|
-
"\u0419": "I" # Й
|
171
|
-
"\u041a": "K" # К
|
172
|
-
"\u041b": "L" # Л
|
173
|
-
"\u041c": "M" # М
|
174
|
-
"\u041d": "N" # Н
|
175
|
-
"\u041e": "O" # О
|
176
|
-
"\u041f": "P" # П
|
177
|
-
"\u0420": "R" # Р
|
178
|
-
"\u0421": "S" # С
|
179
|
-
"\u0422": "T" # Т
|
180
|
-
"\u0423": "U" # У
|
181
|
-
"\u0424": "F" # Ф
|
182
|
-
"\u0425": "Kh" # Х
|
183
|
-
"\u0426": "Ts" # Ц note[7]
|
184
|
-
"\u0427": "Ch" # Ч
|
185
|
-
"\u0428": "Sh" # Ш
|
186
|
-
"\u0429": "Shch" # Щ
|
187
|
-
"\u042c": "\u2019" # Ь
|
188
|
-
"\u042e": "Iu" # Ю
|
189
|
-
"\u042f": "Ia" # Я
|
190
|
-
"\u0430": "a" # а
|
191
|
-
"\u0431": "b" # б
|
192
|
-
"\u0432": "v" # в
|
193
|
-
"\u0433": "h" # г
|
194
|
-
"\u0491": "g" # ґ
|
195
|
-
"\u0434": "d" # д
|
196
|
-
"\u0435": "e" # е
|
197
|
-
"\u0454": "ie" # є
|
198
|
-
"\u0436": "zh" # ж
|
199
|
-
"\u0437": "z" # з
|
200
|
-
"\u0438": "y" # и
|
201
|
-
"\u0456": "i" # і
|
202
|
-
"\u0457": "i" # ї
|
203
|
-
"\u0439": "i" # й
|
204
|
-
"\u043a": "k" # к
|
205
|
-
"\u043b": "l" # л
|
206
|
-
"\u043c": "m" # м
|
207
|
-
"\u043d": "n" # н
|
208
|
-
"\u043e": "o" # о
|
209
|
-
"\u043f": "p" # п
|
210
|
-
"\u0440": "r" # р
|
211
|
-
"\u0441": "s" # с
|
212
|
-
"\u0442": "t" # т
|
213
|
-
"\u0443": "u" # у
|
214
|
-
"\u0444": "f" # ф
|
215
|
-
"\u0445": "kh" # х
|
216
|
-
"\u0446": "ts" # ц note[7]
|
217
|
-
"\u0447": "ch" # ч
|
218
|
-
"\u0448": "sh" # ш
|
219
|
-
"\u0449": "shch" # щ
|
220
|
-
"\u044e": "iu" # ю
|
221
|
-
"\u044f": "ia" # я
|
222
|
-
"\u044c": "\u2019" # ь
|