interscript 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/interscript.rb +5 -1
- data/lib/interscript/fs.rb +3 -1
- data/lib/interscript/mapping.rb +2 -2
- data/lib/interscript/opal.rb +5 -1
- data/lib/interscript/opal/maps.js.erb +7 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +1 -1
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
- data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
- data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +1 -1
- data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +2 -2
- data/maps/alalc-ell-Grek-Latn-1997.yaml +2 -3
- data/maps/alalc-ell-Grek-Latn-2010.yaml +2 -3
- data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -2
- data/maps/alalc-kor-Hang-Latn-1997.yaml +1 -1
- data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
- data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +0 -0
- data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +1 -2
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +2 -2
- data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +0 -0
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -1
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -2
- data/maps/bgn-kor-Hang-Latn-1943.yaml +1 -1
- data/maps/bgn-kor-Kore-Latn-1943.yaml +1 -1
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +5 -5
- data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +0 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +3 -4
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -1
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -1
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +17 -17
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +2 -2
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +2 -2
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -1
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -1
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
- data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
- data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +4 -4
- data/maps/by-bel-Cyrl-Latn-2007.yaml +3 -3
- data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +4 -5
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +4 -5
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -1
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -1
- data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -1
- data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +1 -1
- data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +1 -1
- data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +1 -1
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -5
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -4
- data/maps/icao-per-Arab-Latn-9303.yaml +0 -1
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -1
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -1
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +4 -5
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +1 -2
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -1
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
- data/maps/kp-kor-Hang-Latn-2002.yaml +4 -4
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +2 -2
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +4 -4
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +4 -4
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +1 -2
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +4 -4
- data/maps/nil-kor-Hang-Hang-jamo.yaml +3 -3
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +3 -3
- data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +0 -1
- data/maps/{odni-mkd-cyrl-latn-2015.yaml → odni-mkd-Cyrl-Latn-2015.yaml} +0 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +2 -2
- data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -2
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1968.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1999.yaml +3 -3
- data/maps/{ses-ara-arab-latn-1930.yaml → ses-ara-Arab-Latn-1930.yaml} +7 -3
- data/maps/un-ara-Arab-Latn-1971.yaml +16 -4
- data/maps/un-ara-Arab-Latn-1972.yaml +14 -7
- data/maps/un-ara-Arab-Latn-2017.yaml +56 -19
- data/maps/un-bel-Cyrl-Latn-2007.yaml +3 -3
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +1 -2
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -1
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +42 -42
- data/maps/un-mon-Mong-Latn-2013.yaml +9 -3
- data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
- data/maps/{un-ukr-cyrl-latn-1998.yaml → un-ukr-Cyrl-Latn-1998.yaml} +1 -1
- data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
- data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -2
- data/maps/var-tha-Thai-Thai-phonemic.yaml +5 -5
- data/maps/var-tha-Thai-Zsym-ipa.yaml +12 -12
- data/maps/var-zho-Hani-Latn-1979.yaml +7 -7
- metadata +41 -15
@@ -94,25 +94,33 @@ tests:
|
|
94
94
|
# https://unstats.un.org/unsd/geoinfo/geonames/
|
95
95
|
|
96
96
|
- source: مِصر
|
97
|
-
expected:
|
97
|
+
expected: Mişr
|
98
98
|
|
99
99
|
- source: قَطَر
|
100
|
-
expected:
|
100
|
+
expected: Qaţar
|
101
101
|
|
102
102
|
- source: الجُمهُورِيَّة العِراقِيَّة
|
103
|
-
expected:
|
103
|
+
expected: Al Jumhūrīyah al ‘Irāqīyah
|
104
104
|
|
105
105
|
- source: جُمهُورِيَّة مِصر العَرَبِيَّة
|
106
|
-
expected:
|
106
|
+
expected: Jumhūrīyat Mişr al ‘Arabīyah
|
107
107
|
|
108
108
|
- source: الرِيَاض
|
109
|
-
expected:
|
109
|
+
expected: Ar Riyāḑ
|
110
110
|
|
111
111
|
- source: الشارِقة
|
112
|
-
expected:
|
112
|
+
expected: Ash Shāriqah
|
113
113
|
|
114
114
|
map:
|
115
115
|
inherit: "un-ara-Arab-Latn-2017"
|
116
|
+
postrules:
|
117
|
+
- pattern : ' Aş Ş' # الص
|
118
|
+
result: ' aş Ş'
|
119
|
+
- pattern : ' Aḑ Ḑ' # الض
|
120
|
+
result: ' aḑ Ḑ'
|
121
|
+
- pattern : ' Aţ Ţ' # الط
|
122
|
+
result: ' aţ Ţ'
|
123
|
+
|
116
124
|
characters:
|
117
125
|
|
118
126
|
'\b\u0627\u0644\u0635' : 'aş ş' # الص
|
@@ -149,4 +157,3 @@ map:
|
|
149
157
|
'\ufec7' : 'z̧' # ﻇ
|
150
158
|
'\ufec8' : 'z̧' # ﻈ
|
151
159
|
'\ufec6' : 'z̧' # ﻆ
|
152
|
-
|
@@ -71,6 +71,9 @@ notes:
|
|
71
71
|
middle dot (·) may be used: سهيلة S·haylah (cf. شيلة Shaylah), دهيب
|
72
72
|
D·hayb (cf. ذيب Dhayb), أدهم Ad·ham (cf. أذم Adham).
|
73
73
|
- |
|
74
|
+
ta' marboota should be transliterated to 'ah' if it's in
|
75
|
+
a definite article, or at the end of the sentence
|
76
|
+
otherwise it should be transliterated to 'at'
|
74
77
|
to handle words starting with AL and ending with ta' marboota
|
75
78
|
which is pronounced as "ah" not "at" divided into multiple
|
76
79
|
regex because lookbehind in ruby doesn't support variable length
|
@@ -84,57 +87,92 @@ tests:
|
|
84
87
|
# https://unstats.un.org/unsd/geoinfo/geonames/
|
85
88
|
|
86
89
|
- source: مِصر
|
87
|
-
expected:
|
90
|
+
expected: Mis̱r
|
88
91
|
|
89
92
|
- source: قَطَر
|
90
|
-
expected:
|
93
|
+
expected: Qaṯar
|
91
94
|
|
92
95
|
- source: المَغرِب
|
93
|
-
expected:
|
96
|
+
expected: Al Maghrib
|
94
97
|
|
95
98
|
- source: الجُمهُورِيَّة العِراقِيَّة
|
96
|
-
expected:
|
99
|
+
expected: Al Jumhūrīyah al ‘Irāqīyah
|
97
100
|
|
98
101
|
- source: جُمهُورِيَّة العِراق
|
99
|
-
expected:
|
102
|
+
expected: Jumhūrīyat al ‘Irāq
|
100
103
|
|
101
104
|
- source: جُمهُورِيَّة مِصر العَرَبِيَّة
|
102
|
-
expected:
|
105
|
+
expected: Jumhūrīyat Mis̱r al ‘Arabīyah
|
103
106
|
|
104
107
|
- source: بَغداد
|
105
|
-
expected:
|
108
|
+
expected: Baghdād
|
106
109
|
|
107
110
|
- source: تُونِس
|
108
|
-
expected:
|
111
|
+
expected: Tūnis
|
109
112
|
|
110
113
|
- source: السُعُودِيَّة
|
111
|
-
expected:
|
114
|
+
expected: As Su‘ūdīyah
|
112
115
|
|
113
116
|
- source: اليَمَن
|
114
|
-
expected:
|
117
|
+
expected: Al Yaman
|
115
118
|
|
116
119
|
- source: السُودان
|
117
|
-
expected:
|
120
|
+
expected: As Sūdān
|
118
121
|
|
119
122
|
- source: الجَزائِر
|
120
|
-
expected:
|
123
|
+
expected: Al Jazā'ir
|
121
124
|
|
122
125
|
- source: الجُمهُورِيَّة اللُبنانِيَّة
|
123
|
-
expected:
|
126
|
+
expected: Al Jumhūrīyah al Lubnānīyah
|
124
127
|
|
125
128
|
- source: أسمَرة
|
126
|
-
expected:
|
129
|
+
expected: Asmarah
|
127
130
|
|
128
131
|
- source: جِدَّة
|
129
|
-
expected:
|
132
|
+
expected: Jiddah
|
130
133
|
|
131
134
|
- source: مَكَّة
|
132
|
-
expected:
|
135
|
+
expected: Makkah
|
133
136
|
|
134
137
|
- source: الرِيَاض
|
135
|
-
expected:
|
138
|
+
expected: Ar Riyāḏ
|
136
139
|
|
137
140
|
map:
|
141
|
+
postrules:
|
142
|
+
- pattern: (?<=\b)(?<!\b[‘|’|'])[\u0061-\uFFFF]
|
143
|
+
result: "upcase"
|
144
|
+
# don't capitalize defined article in the middle of a sentence
|
145
|
+
- pattern : ' At T' # الت
|
146
|
+
result: ' at T'
|
147
|
+
- pattern : ' Ath Th' # الث
|
148
|
+
result: ' ath th'
|
149
|
+
- pattern : ' Ad D' # الد
|
150
|
+
result: ' ad D'
|
151
|
+
- pattern : ' Adh Dh' # الذ
|
152
|
+
result: ' adh Dh'
|
153
|
+
- pattern : ' Ar R' # الر
|
154
|
+
result: ' ar R'
|
155
|
+
- pattern : ' Az Z' # الز
|
156
|
+
result: ' az Z'
|
157
|
+
- pattern : ' As S' # الس
|
158
|
+
result: ' as S'
|
159
|
+
- pattern : ' Ash Sh' # الش
|
160
|
+
result: ' ash Sh'
|
161
|
+
- pattern : ' As̱ S̱' # الص
|
162
|
+
result: ' as̱ S̱'
|
163
|
+
- pattern : ' Aḏ Ḏ' # الض
|
164
|
+
result: ' aḏ Ḏ'
|
165
|
+
- pattern : ' Aṯ Ṯ' # الط
|
166
|
+
result: ' aṯ Ṯ'
|
167
|
+
- pattern : ' Ad͟h D͟h' # الظ
|
168
|
+
result: ' ad͟h D͟h'
|
169
|
+
- pattern : ' Al L' # الل
|
170
|
+
result: ' al L'
|
171
|
+
- pattern : ' an n' # الن
|
172
|
+
result: ' an N'
|
173
|
+
- pattern: " Al " # ال
|
174
|
+
result: " al "
|
175
|
+
|
138
176
|
characters:
|
139
177
|
|
140
178
|
# Tool used for Unicode finding:
|
@@ -143,6 +181,7 @@ map:
|
|
143
181
|
# pointing
|
144
182
|
'\u064e' : 'a' # َ fatha
|
145
183
|
'\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
|
184
|
+
'\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
|
146
185
|
'\u0650' : 'i' # ِ kasra
|
147
186
|
'\u064f' : 'u' # ُ damma
|
148
187
|
'\u0652' : '' # ْ sokoon, see note A below
|
@@ -379,5 +418,3 @@ map:
|
|
379
418
|
|
380
419
|
# Vowels, diphthongs and diacritical marks
|
381
420
|
# (ـ stands for any consonant)
|
382
|
-
|
383
|
-
|
@@ -8,11 +8,11 @@ name: National System of Geographic Names Transmission into Roman Alphabet in Be
|
|
8
8
|
url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/9th-uncsgn-docs/crp/9th_UNCSGN_e-conf-98-crp-21.pdf
|
9
9
|
creation_date: 2007
|
10
10
|
description: |
|
11
|
-
RESOLUTION OF THE STATE COMMITTEE
|
11
|
+
RESOLUTION OF THE STATE COMMITTEE
|
12
12
|
ON PROPERTY OF THE REPUBLIC OF BELARUS June 11, 2007 No. 38
|
13
13
|
|
14
|
-
8/16668 (06/18/2007) On amendments and additions to the Instructions
|
15
|
-
for the transliteration of geographical names of the
|
14
|
+
8/16668 (06/18/2007) On amendments and additions to the Instructions
|
15
|
+
for the transliteration of geographical names of the
|
16
16
|
Republic of Belarus in letters of the Latin alphabet
|
17
17
|
|
18
18
|
Based on the Regulation on the State Property Committee of the Republic of Belarus,
|
@@ -22,11 +22,10 @@ tests:
|
|
22
22
|
|
23
23
|
expected: |
|
24
24
|
Éna práma mónon me parakíni̱se ki eména na grápso̱ óti toúti̱n ti̱n patrída ti̱n échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai fto̱choí kai politikoí kai stratio̱tikoí kai oi pléon mikróteroi ánthro̱poi; ósoi ago̱nistí̱kamen, analógo̱s o katheís, échomen na zí̱somen edó̱. To loipón doulépsamen óloi mazí, na ti̱n fylámen ki óloi mazí kai na mi̱n légei oúte o dynatós «egó̱» oúte o adýnatos. Xérete póte na légei o katheís «egó̱»? Ótan ago̱nisteí mónos tou kai fkiásei í̱ chalásei, na légei «egó̱»; ótan ómo̱s ago̱nízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó̱». Kai eis to exí̱s na máthomen gnó̱si̱, an thélomen na fkiásomen cho̱rión, na zí̱somen óloi mazí.
|
25
|
-
|
25
|
+
|
26
26
|
Giánni̱s Makrygiánni̱s.
|
27
27
|
|
28
28
|
map:
|
29
29
|
character_separator: ""
|
30
30
|
word_separator: " "
|
31
31
|
inherit: "elot-ell-Grek-Latn-743-1982-tl"
|
32
|
-
|
@@ -11,7 +11,7 @@ description: |
|
|
11
11
|
UNGEGN Romanization table for Greek: Phonetic transcription
|
12
12
|
|
13
13
|
|
14
|
-
note:
|
14
|
+
note:
|
15
15
|
- Also included in ISO 843:1997, Annex B, Column 5, and ELOT 743:1982, column 5.
|
16
16
|
- Corrected obvious errors, which occur every time the table has reappeared: χ > x, x > ks, oï > oi.
|
17
17
|
- The vowels are taken from the specification, but some are controversial: /ɑ ɛ/ but /o/.
|
@@ -23,37 +23,37 @@ note:
|
|
23
23
|
tests:
|
24
24
|
|
25
25
|
- source: |
|
26
|
-
Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί,
|
27
|
-
|
28
|
-
και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι·
|
29
|
-
|
30
|
-
όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ.
|
31
|
-
|
32
|
-
Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος.
|
33
|
-
|
34
|
-
Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»·
|
35
|
-
|
36
|
-
όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ».
|
37
|
-
|
26
|
+
Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί,
|
27
|
+
|
28
|
+
και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι·
|
29
|
+
|
30
|
+
όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ.
|
31
|
+
|
32
|
+
Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος.
|
33
|
+
|
34
|
+
Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»·
|
35
|
+
|
36
|
+
όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ».
|
37
|
+
|
38
38
|
Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
|
39
39
|
|
40
40
|
Γιάννης Μακρυγιάννης.
|
41
41
|
|
42
42
|
expected: |
|
43
|
-
ɛnɑ prɑmɑ monon mɛ pɑrɑkinisɛ ki ɛmɛnɑ nɑ ɣrɑpso oti tutin tin pɑtriðɑ tin ɛxomɛn oli mɑzi,
|
44
|
-
|
45
|
-
kɛ sofi ki ɑmɑθis kɛ plusii kɛ ftoxi kɛ politiki kɛ strɑtiotiki kɛ i plɛon mikrotɛri ɑnθropi;
|
46
|
-
|
47
|
-
osi ɑɣonistikɑmɛn, ɑnɑloɣos o kɑθis, ɛxomɛn nɑ zisomɛn ɛðo.
|
48
|
-
|
49
|
-
to lipon ðulɛpsɑmɛn oli mɑzi, nɑ tin filɑmɛn ki oli mɑzi kɛ nɑ min lɛɣi utɛ o ðinɑtos «ɛɣo» utɛ o ɑðinɑtos.
|
50
|
-
|
51
|
-
ksɛrɛtɛ potɛ nɑ lɛɣi o kɑθis «ɛɣo»? otɑn ɑɣonisti monos tu kɛ fkiɑsi i xɑlɑsi, nɑ lɛɣi «ɛɣo»;
|
52
|
-
|
53
|
-
otɑn omos ɑɣonizondɛ poli kɛ fkiɑnun, totɛ nɑ lɛnɛ «ɛmis». imɑstɛ is to «ɛmis» ki oxi is to «ɛɣo».
|
54
|
-
|
43
|
+
ɛnɑ prɑmɑ monon mɛ pɑrɑkinisɛ ki ɛmɛnɑ nɑ ɣrɑpso oti tutin tin pɑtriðɑ tin ɛxomɛn oli mɑzi,
|
44
|
+
|
45
|
+
kɛ sofi ki ɑmɑθis kɛ plusii kɛ ftoxi kɛ politiki kɛ strɑtiotiki kɛ i plɛon mikrotɛri ɑnθropi;
|
46
|
+
|
47
|
+
osi ɑɣonistikɑmɛn, ɑnɑloɣos o kɑθis, ɛxomɛn nɑ zisomɛn ɛðo.
|
48
|
+
|
49
|
+
to lipon ðulɛpsɑmɛn oli mɑzi, nɑ tin filɑmɛn ki oli mɑzi kɛ nɑ min lɛɣi utɛ o ðinɑtos «ɛɣo» utɛ o ɑðinɑtos.
|
50
|
+
|
51
|
+
ksɛrɛtɛ potɛ nɑ lɛɣi o kɑθis «ɛɣo»? otɑn ɑɣonisti monos tu kɛ fkiɑsi i xɑlɑsi, nɑ lɛɣi «ɛɣo»;
|
52
|
+
|
53
|
+
otɑn omos ɑɣonizondɛ poli kɛ fkiɑnun, totɛ nɑ lɛnɛ «ɛmis». imɑstɛ is to «ɛmis» ki oxi is to «ɛɣo».
|
54
|
+
|
55
55
|
kɛ is to ɛksis nɑ mɑθomɛn ɣnosi, ɑn θɛlomɛn nɑ fkiɑsomɛn xorion, nɑ zisomɛn oli mɑzi.
|
56
|
-
|
56
|
+
|
57
57
|
ɣiɑnis mɑkriɣiɑnis.
|
58
58
|
|
59
59
|
|
@@ -101,7 +101,7 @@ tests:
|
|
101
101
|
expected: tɑiɣɛtos
|
102
102
|
- source: σπρέυ
|
103
103
|
expected: sprɛi
|
104
|
-
|
104
|
+
|
105
105
|
- source: Αθήνα
|
106
106
|
expected: ɑθinɑ
|
107
107
|
- source: Άγιον Όρος
|
@@ -748,33 +748,33 @@ map:
|
|
748
748
|
|
749
749
|
"\u0387": ";" # ·
|
750
750
|
"\u00B7": ";" # ·
|
751
|
-
|
751
|
+
|
752
752
|
postrules:
|
753
753
|
- pattern: vv
|
754
|
-
result: "v"
|
754
|
+
result: "v"
|
755
755
|
- pattern: ðð
|
756
|
-
result: "ð"
|
756
|
+
result: "ð"
|
757
757
|
- pattern: zz
|
758
|
-
result: "z"
|
758
|
+
result: "z"
|
759
759
|
- pattern: θθ
|
760
|
-
result: "θ"
|
760
|
+
result: "θ"
|
761
761
|
- pattern: kk
|
762
|
-
result: "k"
|
762
|
+
result: "k"
|
763
763
|
- pattern: ll
|
764
|
-
result: "l"
|
764
|
+
result: "l"
|
765
765
|
- pattern: mm
|
766
|
-
result: "m"
|
766
|
+
result: "m"
|
767
767
|
- pattern: nn
|
768
|
-
result: "n"
|
768
|
+
result: "n"
|
769
769
|
- pattern: pp
|
770
|
-
result: "p"
|
770
|
+
result: "p"
|
771
771
|
- pattern: rr
|
772
|
-
result: "r"
|
772
|
+
result: "r"
|
773
773
|
- pattern: ss
|
774
|
-
result: "s"
|
774
|
+
result: "s"
|
775
775
|
- pattern: tt
|
776
|
-
result: "t"
|
776
|
+
result: "t"
|
777
777
|
- pattern: ff
|
778
|
-
result: "f"
|
778
|
+
result: "f"
|
779
779
|
- pattern: xx
|
780
|
-
result: "x"
|
780
|
+
result: "x"
|
@@ -43,6 +43,12 @@ tests:
|
|
43
43
|
- source: "ᠬᠥᠬᠡᠬᠣᠲᠠ"
|
44
44
|
expected: "kökeqota"
|
45
45
|
map:
|
46
|
+
rules:
|
47
|
+
- pattern: \u182c(\u1821|\u1825|\u1826)
|
48
|
+
result: "k\\1"
|
49
|
+
- pattern: \u182d(\u1821|\u1825|\u1826)
|
50
|
+
result: "g\\1"
|
51
|
+
|
46
52
|
characters:
|
47
53
|
"ᠠ": "a"
|
48
54
|
"ᠪ": "b"
|
@@ -51,9 +57,9 @@ map:
|
|
51
57
|
"ᠳ": "d"
|
52
58
|
"ᠡ": "e"
|
53
59
|
"ᠹ": "f"
|
54
|
-
"ᠭ": "g"
|
60
|
+
"ᠭ": "ġ"
|
55
61
|
"ᠺ": "g"
|
56
|
-
"ᠬ": "
|
62
|
+
"ᠬ": "q"
|
57
63
|
"ᠾ": "h"
|
58
64
|
"ᠢ": "i"
|
59
65
|
"ᠵ": "j"
|
@@ -61,7 +67,7 @@ map:
|
|
61
67
|
"ᠯ": "l"
|
62
68
|
"ᠮ": "m"
|
63
69
|
"ᠨ": "n"
|
64
|
-
"ᠥ": "o"
|
70
|
+
"ᠥ": "ö"
|
65
71
|
"ᠫ": "p"
|
66
72
|
"ᠴ": "q"
|
67
73
|
"ᠷ": "r"
|
@@ -0,0 +1,163 @@
|
|
1
|
+
---
|
2
|
+
authority_id: ungegn
|
3
|
+
id: 1972
|
4
|
+
language: nep
|
5
|
+
source_script: Deva
|
6
|
+
destination_script: Latn
|
7
|
+
name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Nepali Romanization, 1972
|
8
|
+
url: https://www.eki.ee/wgrs/v3_0/rom1_ne.pdf
|
9
|
+
creation_date: 1972
|
10
|
+
confirmation_date: 2010
|
11
|
+
description: |
|
12
|
+
The United Nations recommended system was approved in 1972 (II/11), based on a report
|
13
|
+
prepared by D. N. Sharma. The note on the system was published in volume II of the
|
14
|
+
conference report.
|
15
|
+
|
16
|
+
There is no evidence of the use of the system either in Nepal or in international cartographic
|
17
|
+
products. The resolution IV/17 (1982) recommended association, inter alia, with Nepal in
|
18
|
+
carrying out further studies on the system. In Nepal a system of romanization is employed by
|
19
|
+
the Nepal Survey Department (NSD). This system has been applied to names appearing on
|
20
|
+
national mapping, and it is also used in the Geographic Information Infrastructure Programme
|
21
|
+
(NGIIP).
|
22
|
+
|
23
|
+
Nepali (Nepālī) uses the alphasyllabic script Devanāgarī.
|
24
|
+
|
25
|
+
notes:
|
26
|
+
|
27
|
+
- In normal spelling ṙ and ṙh are not marked, instead dotless ड ḍa, ढ ḍha are used to denote these sounds.
|
28
|
+
- |
|
29
|
+
Ligatures may be formed with r as the first component: ~ह rha.
|
30
|
+
|
31
|
+
tests:
|
32
|
+
- source: "लेखन"
|
33
|
+
expected: "laekhana"
|
34
|
+
- source: "मुद्रा"
|
35
|
+
expected: "maudaaraā"
|
36
|
+
- source: "प्रशंसा"
|
37
|
+
expected: "paarashaṁsaā"
|
38
|
+
- source: "अंक"
|
39
|
+
expected: "aṁka"
|
40
|
+
- source: "नेकपाले स्थगित स्थायी कमिटीको बैठक भदौ गते बोलाउने भएको"
|
41
|
+
expected: "naekapaālae saathagaita saathaāyaī kamaiṭaīkao baaiṭhaka bhadaau gatae baolaāunae bhaekao"
|
42
|
+
- source: "न घर रह्यो, न परिवार"
|
43
|
+
expected: "na ghara rahaayao, na paraivaāra"
|
44
|
+
- source: "ढोरपाटनमा भुजीखोला बाढीपहिरोले अभिभावक गुमाएका बालबालिकाको बिचल्ली"
|
45
|
+
expected: "ḍhaorapaāṭanamaā bhaujaīkhaolaā baāḍhaīpahairaolae abhaibhaāvaka gaumaāekaā baālabaālaikaākao baichalaalaī"
|
46
|
+
- source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
|
47
|
+
expected: "sausaamaitaākaā kaākaā haemabahaādaura ra kaākaīlaāī panai pahairaolae bagaāyao"
|
48
|
+
- source: "संविधान जारी भएसँगै सार्वजनिक प्रशासनमा नयाँ उत्साह आउने अपेक्षा थियो"
|
49
|
+
expected: "saṁvaidhaāna jaāraī bhaesam̐gaai saāraavajanaika paarashaāsanamaā nayaām̐ utaasaāha āunae apaekaaṣhaā thaiyao"
|
50
|
+
- source: "देशमा कोरोना संक्रमित र मृतकको संख्या हरेक दिन बढ्दो छ"
|
51
|
+
expected: "daeshamaā kaoraonaā saṁkaaramaita ra maṛtakakao saṁkhaayaā haraeka daina baḍhaadao chha"
|
52
|
+
- source: "गाउँपालिकाका अध्यक्ष टिका गुरुङका अनुसार विष्णुदासलाई राजुले सुत्नका लागि बेलुका साथी लगेका थिए"
|
53
|
+
expected: "gaāum̐paālaikaākaā adhaayakaaṣha ṭaikaā gaurauṅakaā anausaāra vaiṣhaaṇaudaāsalaāī raājaulae sautaanakaā laāgai baelaukaā saāthaī lagaekaā thaie"
|
54
|
+
- source: "यो आयोजना गाउँपालिकाको केन्द्र तेल्लोकमा पर्छ"
|
55
|
+
expected: "yao āyaojanaā gaāum̐paālaikaākao kaenaadaara taelaalaokamaā paraachha"
|
56
|
+
- source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
|
57
|
+
expected: "sausaamaitaākaā kaākaā haemabahaādaura ra kaākaīlaāī panai pahairaolae bagaāyao"
|
58
|
+
- source: "चैत पहिलो साता घर आएका उनी लकडाउन भएपछि यतै रोकिए"
|
59
|
+
expected: "chaaita pahailao saātaā ghara āekaā unaī lakaḍaāuna bhaepachhai yataai raokaie"
|
60
|
+
- source: "काम गर्न जानेको हकमा रोजगारदाता कम्पनीको पत्रसँगै वडा र जिल्ला प्रशासनको सिफारिस अनिवार्य गरिएको छ"
|
61
|
+
expected: "kaāma garaana jaānaekao hakamaā raojagaāradaātaā kamaapanaīkao pataarasam̐gaai vaḍaā ra jailaalaā paarashaāsanakao saiphaāraisa anaivaāraaya garaiekao chha"
|
62
|
+
|
63
|
+
map:
|
64
|
+
|
65
|
+
characters:
|
66
|
+
|
67
|
+
# Vowels and Diphthongs
|
68
|
+
'अ': 'a'
|
69
|
+
'आ': 'ā'
|
70
|
+
'इ': 'i'
|
71
|
+
'ई': 'ī'
|
72
|
+
'उ': 'u'
|
73
|
+
'ऊ': 'ū'
|
74
|
+
'ऋ': 'ṛ'
|
75
|
+
'ॠ': 'ṝ'
|
76
|
+
'ऌ': 'l̤'
|
77
|
+
'ए': 'e'
|
78
|
+
'ऐ': 'ai'
|
79
|
+
'ओ': 'o'
|
80
|
+
'औ': 'au'
|
81
|
+
|
82
|
+
# Medials # Needed for connecting constants
|
83
|
+
|
84
|
+
'ा': "ā"
|
85
|
+
'ि': "i"
|
86
|
+
'ी': "ī"
|
87
|
+
'ु': "u"
|
88
|
+
'ू': "ū"
|
89
|
+
'ृ': "ṛ"
|
90
|
+
'े': "e"
|
91
|
+
'ै': "ai"
|
92
|
+
'ो': "o"
|
93
|
+
'ौ': "au"
|
94
|
+
|
95
|
+
# Consonants (see Note 1)
|
96
|
+
|
97
|
+
# Gutturals
|
98
|
+
'क': 'ka'
|
99
|
+
'ख': 'kha'
|
100
|
+
'ग': 'ga'
|
101
|
+
'घ': 'gha'
|
102
|
+
'ङ': 'ṅa'
|
103
|
+
|
104
|
+
# Palatals
|
105
|
+
'च': 'cha'
|
106
|
+
'छ': 'chha'
|
107
|
+
'ज': 'ja'
|
108
|
+
'झ': 'jha'
|
109
|
+
'ञ': 'ña'
|
110
|
+
|
111
|
+
# Cerebrals
|
112
|
+
'ट': 'ṭa'
|
113
|
+
'ठ': 'ṭha'
|
114
|
+
'ड': 'ḍa'
|
115
|
+
'ढ': 'ḍha'
|
116
|
+
'ण': 'ṇa'
|
117
|
+
|
118
|
+
# Dentals
|
119
|
+
'त': 'ta'
|
120
|
+
'थ': 'tha'
|
121
|
+
'द': 'da'
|
122
|
+
'ध': 'dha'
|
123
|
+
'न': 'na'
|
124
|
+
|
125
|
+
# Labials
|
126
|
+
'प': 'pa'
|
127
|
+
'फ': 'pha'
|
128
|
+
'ब': 'ba'
|
129
|
+
'भ': 'bha'
|
130
|
+
'म': 'ma'
|
131
|
+
|
132
|
+
# Semivowels
|
133
|
+
'य': 'ya'
|
134
|
+
'र': 'ra'
|
135
|
+
'ल': 'la'
|
136
|
+
'व': 'va' # or wa [Note#3]
|
137
|
+
|
138
|
+
# Sibilants
|
139
|
+
'श': 'sha'
|
140
|
+
'ष': 'ṣha'
|
141
|
+
'स': 'sa'
|
142
|
+
|
143
|
+
# Dotted variants
|
144
|
+
'क़': qa
|
145
|
+
'ख़': ḳha
|
146
|
+
'ग़': ga
|
147
|
+
'ज़': za
|
148
|
+
'ड़': ṙa
|
149
|
+
'ढ़': ṙha
|
150
|
+
'फ़': fa
|
151
|
+
|
152
|
+
|
153
|
+
# Aspirate
|
154
|
+
'ह': 'ha'
|
155
|
+
|
156
|
+
# Anusvāra
|
157
|
+
'ं': 'ṁ'
|
158
|
+
|
159
|
+
# Anunāsika
|
160
|
+
'ँ': 'm̐'
|
161
|
+
|
162
|
+
# halanta
|
163
|
+
'्': 'a'
|