interscript 0.1.7 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,236 @@
1
+ ---
2
+ authority_id: ungen
3
+ id: 2017
4
+ language: iso-639-3:prs # prs stands for Dari (https://iso639-3.sil.org/code/prs&_ga=GA1.2.2054538372.1574092823)
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: Persian UN 1967
8
+ url: http://www.eki.ee/wgrs/v2_2/rom1_fa.htm
9
+ creation_date: 1967
10
+ confirmation_date: 01-2003
11
+ description: |
12
+ The United Nations recommended system was approved in 1967 (
13
+ I/13), based on the official system adopted by Iran and
14
+ published in its English version as Transliteration of
15
+ Farsi Geographic Names to Latin Alphabet (September 1966).
16
+ The romanization table was also published as an annex to
17
+ the Toponymic Guidelines for the Islamic Republic of Iran
18
+ in 2000 (Toponymic Guidelines for map and other editors –
19
+ Revised edition 1998. Submitted by the Islamic Republic of
20
+ Iran. UNGEGN, 20th session. New York, 17-28 January 2000,
21
+ Working Paper No. 41.).
22
+
23
+ The system is used in the Islamic Republic of Iran and in
24
+ international cartographic products.
25
+
26
+ Persian (Farsi) uses the Perso-Arabic script that is
27
+ written from right to left. The Persian script usually
28
+ omits vowel points and diacritical marks from writing which
29
+ makes it difficult to obtain uniform results in the
30
+ romanization of Persian. The romanization is generally
31
+ reversible though there are some ambiguous letter
32
+ sequences (kh, sh, th, zh) which also may represent the
33
+ romanized values of two Persian characters in addition to
34
+ the respective single ones.
35
+
36
+
37
+ notes:
38
+ - A Word-initially.
39
+ - B Not romanized; marks absence of the vowel.
40
+ - C Doubling of the consonant letter.
41
+ - D After a consonant (excl. -ah).
42
+ - E After a vowel (see also note 2).
43
+ - 1-The adjectival ending of Arabic origin -يه in Persian is
44
+ romanized -īyeh. In romanizing the definite article the
45
+ same rules of assimilation of consonants are applied as in
46
+ Arabic, e.g. زين الدين Zeyn od Dīn.
47
+
48
+ - 2-The relational suffix (eẕāfeh) -e is usually not
49
+ expressed in Persian writing after a consonant. After final
50
+ ا or و it is written with ى, e.g. پاى آب Pā-ye Āb. After
51
+ final ى and ه it is expressed by writing hamzeh over the
52
+ character دهانۀ ممبر Dahāneh-ye Mambar.
53
+
54
+ - 3-To point Persian vowels two systems are in use that are
55
+ separated by a column in the table. The first system is a
56
+ Persian one while the other adheres to the Arabic
57
+ tradition. In normal spelling vowel points are not used.
58
+
59
+ tests:
60
+ - source: اَنجِيرة
61
+ expected: Anjīrah
62
+
63
+ - source: اِيْوَانِي
64
+ expected: Eyvānī
65
+
66
+ - source: آبَادَان
67
+ expected: Ābādān
68
+
69
+ - source: قُرآن
70
+ expected: Qor’ān
71
+
72
+ - source: مَآب
73
+ expected: Ma’āb
74
+
75
+ - source: مُحَمَّد
76
+ expected: Moḩammad
77
+
78
+ - source: كُوهِ مَرغُوب
79
+ expected: Kūh-e Marghūb
80
+
81
+ - source: پَايِ آب
82
+ expected: Pā-ye Āb
83
+
84
+ - source: جُويِ آس
85
+ expected: Jū-ye Ās
86
+
87
+ - source: دَهَانِهٴ مَمبَر
88
+ expected: Dahāneh-ye Mambar
89
+
90
+ - source: سَلَسِيٴ بُذُرگ
91
+ expected: Salasī-ye Boz̄org
92
+
93
+ - source: ذُو الفَقَار
94
+ expected: Z̄ū ol Faqār
95
+
96
+
97
+ map:
98
+ postrules:
99
+ - pattern: (?<=\b)(?<!\b[‘|’|'|-])[\u0061-\uFFFF]
100
+ result: "upcase"
101
+
102
+ - pattern: " Al"
103
+ result: " al"
104
+
105
+ - pattern: " Ol"
106
+ result: " ol"
107
+
108
+ characters:
109
+
110
+ '\u064e' : 'a' # َ fatha
111
+ '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
112
+ '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
113
+ '\u0650' : 'e' # ِ kasra
114
+ '\u064f' : 'o' # ُ damma
115
+ '\u0652' : '' # ْ sokoon, see Note B
116
+
117
+ '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
118
+ '\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي
119
+ '\b\u0622' : 'ā' # آ NOTE A
120
+ '\u0622' : '’ā' # آ
121
+ '\u0650\u064a' : 'ī' # ـِي kasra followed by ي
122
+ '\u064f\u0648' : 'ū' # ـُو damma followed by و
123
+ '[\u064e|\u0650]\u064a\u0652' : 'ey' # ـَيْ
124
+ '[\u064e|\u064f]\u0648\u0652' : 'ow' # ـَوْ
125
+ '\u0621' : '’' # ء
126
+ '\u2013' : '–'
127
+ '\u2013[\u0649|\u064a]\u0647' : '-īyeh'
128
+ '[\u0654|\u0674]' : '-e' # ٴ ezafeh
129
+ '(?<=[\u064a|\u0647])[\u0654|\u0674]' : '-ye' # ٴ ezafeh
130
+ '\u0650\b' : '-e' # ِ kasra
131
+ '[\u064a|\u06cc]\u0650\b' : '-ye' # ِ kasra
132
+
133
+ # NOTE C
134
+ '\u0628\u0651' : 'bb' # ب
135
+ '\u062a\u0651' : 'tt' # ت
136
+ '\u062b\u0651' : 's̄s̄' # ث
137
+ '\u062c\u0651' : 'jj' # ج
138
+ '\u062d\u0651' : 'ḩḩ' # ح
139
+ '\u062e\u0651' : 'kh' # خ
140
+ '\u062f\u0651' : 'dd' # د
141
+ '\u0630\u0651' : 'z̄z̄' # ذ
142
+ '\u0631\u0651' : 'rr' # ر
143
+ '\u0632\u0651' : 'zz' # ز
144
+ '\u0633\u0651' : 'ss' # س
145
+ '\u0634\u0651' : 'sh' # ش
146
+ '\u0635\u0651' : 'şş' # ص
147
+ '\u0636\u0651' : 'ẕẕ' # ض
148
+ '\u0637\u0651' : 'ţţ' # ط
149
+ '\u0638\u0651' : 'z̧z̧' # ظ
150
+ '\u063a\u0651' : 'gh' # غ
151
+ '\u0641\u0651' : 'ff' # ف
152
+ '\u0642\u0651' : 'qq' # ق
153
+ '\u0643\u0651' : 'kk' # ك
154
+ '\u0644\u0651' : 'll' # ل
155
+ '\u0645\u0651' : 'mm' # م
156
+ '\u0646\u0651' : 'nn' # ن
157
+ '\u0647\u0651' : 'hh' # ه
158
+ '\u0648\u0651' : 'vv' # و
159
+ '\u064a\u0651' : 'yy' # ي
160
+
161
+ # NOTE 1
162
+ # Sun letters
163
+ '\b\u0627\u0644\u062a' : 'ot t' # الت
164
+ '\b\u0627\u0644\u062b' : 'os̄ s̄' # الث
165
+ '\b\u0627\u0644\u062f' : 'od d' # الد
166
+ '\b\u0627\u0644\u0630' : 'oz̄ z̄' # الذ
167
+ '\b\u0627\u0644\u0631' : 'or r' # الر
168
+ '\b\u0627\u0644\u0632' : 'oz z' # الز
169
+ '\b\u0627\u0644\u0633' : 'os s' # الس
170
+ '\b\u0627\u0644\u0634' : 'osh sh' # الش
171
+ '\b\u0627\u0644\u0635' : 'oş ş' # الص
172
+ '\b\u0627\u0644\u0636' : 'oẕ ẕ' # الض
173
+ '\b\u0627\u0644\u0637' : 'oţ ţ' # الط
174
+ '\b\u0627\u0644\u0638' : 'oz̧ z̧' # الظ
175
+ '\b\u0627\u0644\u0644' : 'ol l' # الل
176
+ '\b\u0627\u0644\u0646' : 'on n' # الن
177
+
178
+ '\u0650\u064a\u0651' : 'īy' # ـِيَّ
179
+ '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
180
+
181
+ # ta' marboota
182
+ '\u0629' : 'at' # ة in the middle of the sentence
183
+ '\u0629$' : 'ah'
184
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
185
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
186
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
187
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
188
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
189
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
190
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
191
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
192
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
193
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
194
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
195
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
196
+
197
+ '\b\u0627\u0644' : 'al ' # ال
198
+ '\s\b\u0627\u0644' : ' ol ' # ال #special Rule 1
199
+
200
+ '\b\u0627' : '' # ا initial
201
+ '\u0627' : 'ā' # ا middial
202
+ '\u0627\b' : 'ā' # ا final
203
+
204
+ '\u0628' : 'b' # ب
205
+ '\u067E' : 'p' # پ
206
+ '\u062A' : 't' # ت
207
+ '\u062B' : 's̄' # ث
208
+ '\u062C' : 'j' # ج
209
+ '\u0686' : 'ch' # ‫چ‬
210
+ '\u062D' : 'ḩ' # ح
211
+ '\u062E' : 'kh' # خ
212
+ '\u062F' : 'd' # د
213
+ '\u0630' : 'z̄' # ذ
214
+ '\u0631' : 'r' # ر
215
+ '\u0632' : 'z' # ز
216
+ '\u0698' : 'zh' # ‫ژ‬
217
+ '\u0633' : 's' # س
218
+ '\u0634' : 'sh' # ش
219
+ '\u0635' : 'ş' # ص
220
+ '\u0636' : 'ẕ' # ض
221
+ '\u0637' : 'ţ' # ط
222
+ '\u0638' : 'z̧' # ظ
223
+ '\u0639' : '’' # ع
224
+ '\u063A' : 'gh' # غ
225
+ '\u0641' : 'f' # ف
226
+ '\u0642' : 'q' # ق
227
+ '\u0643' : 'k' # ك
228
+ '\u06A9' : 'k' # ک
229
+ '\u06AF' : 'g' # گ
230
+ '\u0644' : 'l' # ل‫‬
231
+ '\u0645' : 'm' # م
232
+ '\u0646' : 'n' # ن
233
+ '\u0648' : 'v' # و
234
+ '\u0647' : 'h' # ه
235
+ '\u0649' : 'y' # ي
236
+ '\u064a' : 'y' # ي
@@ -0,0 +1,194 @@
1
+ ---
2
+ authority_id: un
3
+ id: 1972
4
+ language: iso-639-2:tam
5
+ source_script: Gujr
6
+ destination_script: Taml
7
+ name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES --Tamil Romanization Version 4.0
8
+ url: https://www.eki.ee/wgrs/rom1_ta.htm
9
+ creation_date: 1972
10
+ confirmation_date: 2016
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12), based on a report prepared by
13
+ D. N. Sharma. The tables and their corrections were published in volume II of the conference reports.
14
+
15
+ There is no evidence of the use of the system either in India, Sri Lanka or in international cartographic products.
16
+
17
+ Tamil uses an alphasyllabic script whereby each character represents a syllable rather than one sound. Vowels and diphthongs
18
+ are marked in two ways: as independent characters (used syllable-initially) and in an abbreviated form, to denote vowels after
19
+ consonants. The romanization table is unambiguous. The system is on the whole reversible.
20
+
21
+ notes:
22
+
23
+ - |
24
+ ஜி Variation: டி ṭi.
25
+ - |
26
+ ஜீ Variation: டீ ṭī.
27
+ - |
28
+ ஜு Variations: கு ku, ஙு ṅu, சு chu, ஞு ñu, டு ṭu, ணு ṇu, து tu, நு nu, பு pu, மு mu, யு yu, ரு ru, லு lu, etc.
29
+ - |
30
+ ஜூ Variations: கூ kū, ஙூ ṅū, சூ chū, ஞூ ñū, டூ ṭū, ணூ ṇū, தூ tū, நூ nū, பூ pū, மூ mū, etc.
31
+ - |
32
+ ஃ Absent in the original table.
33
+ - |
34
+ ் Pronunciation without a vowel: க் k.
35
+ - |
36
+ ஜ, ஶ, ஸ, ன Characters borrowed from the Grantha script.
37
+ - |
38
+ ஸ Special syllable character: ஸ்ரீ srī.
39
+
40
+ tests:
41
+ - source: "அழிந்து போன நகரத்தில் , தொலைந்து போன நான்"
42
+ expected: "al̮intu poṉa nakarattil , tŏlaintu poṉa nāṉ"
43
+ - source: "முதன் முதலாக - மை ஃபர்ஸ்ட் சோலோ ட்ராவல்"
44
+ expected: "mutaṉ mutalāka - mai ḥparsṭ cholo ṭrāval"
45
+ - source: "வாழ்க்கையில் அவன் போன முதல் சோலோ டிரிப் அது தான்."
46
+ expected: "vāl̮kkaiyil avaṉ poṉa mutal cholo ṭirip atu tāṉ."
47
+ - source: "ஸ்கூல் ப்ரெண்ட் கார்த்திக் வீட்டுக்கு போய்ட்டு"
48
+ expected: "skūl prĕṇṭ kārttik vīṭṭukku poyṭṭu"
49
+ - source: "நாசா வெளியிட்ட வெடிக்கும் நட்சத்திரத்தின் வீடியோ"
50
+ expected: "nāchā vĕḷiyiṭṭa vĕṭikkum naṭchattirattiṉ vīṭiyo"
51
+ - source: "டார்பிடோவை ஏவ உதவும் சூப்பர்சானிக் ஏவுகணையான ஸ்மார்ட் சோதனை வெற்றி"
52
+ expected: "ṭārpiṭovai eva utavum chūpparchāṉik evukaṇaiyāṉa smārṭ chotaṉai vĕṟṟi"
53
+ - source: "இந்த ஆண்டு மருத்துவத்துக்கான நோபல் பரிசு பெறுபவர்களின் பெயர்கள் அறிவிப்பு"
54
+ expected: "inta āṇṭu maruttuvattukkāṉa nopal parichu pĕṟupavarkaḷiṉ pĕyarkaḷ aṟivippu"
55
+ - source: "மல்லையா விவகாரம்: பிரிட்டன் அரசின் நடவடிக்கைகள் தங்களுக்கு தெரியவில்லை - மத்திய அரசு தகவல்"
56
+ expected: "mallaiyā vivakāram: piriṭṭaṉ arachiṉ naṭavaṭikkaikaḷ taṅkaḷukku tĕriyavillai - mattiya arachu takaval"
57
+ - source: "ஆலோசனைக்குப் பிறகு தேனியில் இருந்து சென்னை புறப்பட்டார் துணை முதலமைச்சர் பன்னீர்செல்வம்"
58
+ expected: "ālochaṉaikkup piṟaku teṉiyil iruntu chĕṉṉai puṟappaṭṭār tuṇai mutalamaichchar paṉṉīrchĕlvam"
59
+ - source: "இன்று தான் பேரன் பிறந்தநாள் முடிந்து ஃப்ரீ ஆகி இருக்கிறேன்"
60
+ expected: "iṉṟu tāṉ peraṉ piṟantanāḷ muṭintu ḥprī āki irukkiṟeṉ"
61
+
62
+ map:
63
+
64
+ rules:
65
+ - pattern: ([க]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
66
+ result: 'k'
67
+ - pattern: ([ங]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
68
+ result: 'ṅ'
69
+ - pattern: ([ச]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
70
+ result: 'ch'
71
+ - pattern: ([ஞ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
72
+ result: 'ñ'
73
+ - pattern: ([ட]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
74
+ result: 'ṭ'
75
+ - pattern: ([ண]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
76
+ result: 'ṇ'
77
+ - pattern: ([த]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
78
+ result: 't'
79
+ - pattern: ([ந]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
80
+ result: 'n'
81
+ - pattern: ([ப]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
82
+ result: 'p'
83
+ - pattern: ([ம]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
84
+ result: 'm'
85
+ - pattern: ([ய]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
86
+ result: 'y'
87
+ - pattern: ([ர]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
88
+ result: 'r'
89
+ - pattern: ([ல]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
90
+ result: 'l'
91
+ - pattern: ([வ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
92
+ result: 'v'
93
+ - pattern: ([ழ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
94
+ result: 'l̮'
95
+ - pattern: ([ள]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
96
+ result: 'ḷ'
97
+ - pattern: ([ற]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
98
+ result: 'ṟ'
99
+ - pattern: ([ன]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
100
+ result: 'ṉ'
101
+ - pattern: ([ஜ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
102
+ result: 'j'
103
+ - pattern: ([ஶ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
104
+ result: 'sh'
105
+ - pattern: ([ஷ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
106
+ result: 'ṣh'
107
+ - pattern: ([ஸ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
108
+ result: 's'
109
+ - pattern: ([ஹ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
110
+ result: 'h'
111
+
112
+
113
+ characters:
114
+ 'அ': 'a'
115
+ 'ஆ': 'ā'
116
+ 'ா': 'ā'
117
+
118
+ 'இ': 'i'
119
+ 'ி': 'i'
120
+
121
+ 'ஈ': 'ī'
122
+ 'ீ': 'ī'
123
+
124
+ 'உ': 'u'
125
+ 'ு': 'u'
126
+
127
+ 'ஊ': 'ū'
128
+ 'ூ': 'ū'
129
+
130
+ 'ெ': "ĕ"
131
+ 'எ': 'ĕ'
132
+
133
+ 'ே': "e"
134
+ 'ஏ': 'e'
135
+
136
+ 'ஐ': 'ai'
137
+ 'ை': "ai"
138
+
139
+ 'ஒ': 'ŏ'
140
+ 'ொ': 'ŏ'
141
+
142
+ 'ோ': 'o'
143
+ 'ஓ': 'o'
144
+
145
+ 'ஔ': 'au'
146
+ 'ௌ': 'au'
147
+
148
+ 'ஃ': 'ḥ'
149
+ '்': ''
150
+
151
+ 'க': 'ka'
152
+ 'ங': 'ṅa'
153
+ 'ச': 'cha'
154
+ 'ஞ': 'ña'
155
+ 'ட': 'ṭa'
156
+ 'ண': 'ṇa'
157
+ 'த': 'ta'
158
+ 'ந': 'na'
159
+ 'ப': 'pa'
160
+ 'ம': 'ma'
161
+ 'ய': 'ya'
162
+ 'ர': 'ra'
163
+ 'ல': 'la'
164
+ 'வ': 'va'
165
+ 'ழ': 'l̮a'
166
+ 'ள': 'ḷa'
167
+ 'ற': 'ṟa'
168
+ 'ன': 'ṉa'
169
+ 'ஜ': 'ja'
170
+ 'ஶ': 'sha'
171
+ 'ஷ': 'ṣha'
172
+ 'ஸ': 'sa'
173
+ 'ஹ': 'ha'
174
+ 'க்ஷ': 'kṣha'
175
+ 'ற்ற': 'ṟṟa'
176
+ 'ன்ற': 'ṉṟa'
177
+
178
+ # Digits
179
+ '௧': '1'
180
+ '௨': '2'
181
+ '௩': '3'
182
+ '௪': '4'
183
+ '௫': '5'
184
+ '௬': '6'
185
+ '௭': '7'
186
+ '௮': '8'
187
+ '௯': '9'
188
+ '௦': '0'
189
+ '௰': '10'
190
+ '௱': '100'
191
+ '௲': '1000'
192
+
193
+ "‍": '' # no need for zero with joiner
194
+ "‌": '' # no need for zero with non joiner
@@ -0,0 +1,270 @@
1
+ ---
2
+ authority_id: un
3
+ id: 1972
4
+ language: iso-639-2:tel
5
+ source_script: Telu
6
+ destination_script: Latn
7
+ name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Telugu Romanization, 1972
8
+ url: https://www.eki.ee/wgrs/rom1_te.htm
9
+ creation_date: 1972
10
+ confirmation_date: 2016
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12),
13
+ based on a report prepared by D. N. Sharma. The tables and their corrections were published in volume II
14
+ of the conference reports.
15
+
16
+ There is no evidence of the use of the system either in India or in international cartographic products.
17
+
18
+ Telugu uses an alphasyllabic script whereby each character represents a syllable rather than one sound.
19
+ Vowels and diphthongs are marked in two ways: as independent characters (used syllable-initially) and in
20
+ an abbreviated form, to denote vowels after consonants. The romanization table is unambiguous. The system is
21
+ mostly reversible but there may exist some ambiguities in the romanization of vowels (independent vs. abbreviated characters)
22
+ and consonants (combinations with subscript consonants vs. character sequences).
23
+
24
+ notes:
25
+
26
+ - |
27
+ Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
28
+ vowels following a consonant can be found in grammars; no distinction between the two is
29
+ made in transliteration.
30
+ - |
31
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
32
+ transliteration, with the following exceptions:
33
+ a) when another vowel is indicated by its appropriate sign; and
34
+ b) when the absence of any vowel is indicated by the superscript sign ( ౬ ) called valapalagilaka.
35
+ - |
36
+ Exception: Sunna is transliterated by:
37
+ a) ṅ before gutturals,
38
+ b) ñ before palatals,
39
+ c) ṇ before cerebrals,
40
+ d) n before dentals, and
41
+ e) m before labials.
42
+ - |
43
+ Ardhasunna before gutturals and palatal, cerebral, and dental occlusives is transliterated n̐.
44
+ Before labials, sibilants, semivowels, the aspirate, vowels, and in final position it is
45
+ transliterated m
46
+
47
+ tests:
48
+ - source: "తమిళనాడు"
49
+ expected: "tamiḷanāḍu"
50
+ - source: "తంటికొండ ఘటన: ఆగని మృత్యుఘోష"
51
+ expected: "taṃṭikŏṃḍa ghaṭana: āgani mṛtyughoṣha"
52
+ - source: "మళ్లీ వివాదం: అమితాబ్‌పై కేసు"
53
+ expected: "maḷlī vivādaṃ: amitābpai kesu"
54
+ - source: "వరద సాయం పేరుతో వైట్ కాలర్ దోపిడీ"
55
+ expected: "varada sāyaṃ peruto vaiṭ kālar dopiḍī"
56
+ - source: "రెండో విడత జీఎస్టీ పరిహారం"
57
+ expected: "rĕṃḍo viḍata jīĕsṭī parihāraṃ"
58
+ - source: "నితీష్‌ కుమార్‌ అధ్యాయం ముగిసినట్లేనా?!"
59
+ expected: "nitīṣh kumār adhyāyaṃ mugisinaṭlenā?!"
60
+ - source: "వారిపై జీవితాంతం నిషేధం విధించండి!"
61
+ expected: "vāripai jīvitāṃtaṃ niṣhedhaṃ vidhiṃchaṃḍi!"
62
+ - source: "మరో లాక్‌డౌన్‌ వల్ల అన్నీ అనర్థాలే!"
63
+ expected: "maro lākḍaun valla annī anarthāle!"
64
+ - source: "జెసిండా మరో సంచలనం"
65
+ expected: "jĕsiṃḍā maro saṃchalanaṃ"
66
+ - source: "స్వీయ నిర్బంధంలోకి డబ్ల్యూహెచ్‌ఓ డైరెక్టర్‌"
67
+ expected: "svīya nirbaṃdhaṃloki ḍablyūhĕcho ḍairĕkṭar"
68
+ - source: "కరోనాపై యుద్ధంలో సమిధలు"
69
+ expected: "karonāpai yuddhaṃlo samidhalu"
70
+ - source: "అమెరికా ఎన్నికలు: ‘పెద్దన్న’ ఎవరో?!"
71
+ expected: "amĕrikā ĕnnikalu: ‘pĕddanna’ ĕvaro?!"
72
+ - source: "౪౬౨౬౯"
73
+ expected: "46269"
74
+ - source: "రంగపూర్"
75
+ expected: "raṃgapūr"
76
+ # subscript consonant characters
77
+ - source: "ట్ట"
78
+ expected: "ṭṭa"
79
+ - source: "ప్ప"
80
+ expected: "ppa"
81
+ - source: "చ్చ"
82
+ expected: "chcha"
83
+
84
+
85
+ map:
86
+
87
+ rules:
88
+ - pattern: ([క]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
89
+ result: 'k'
90
+ - pattern: ([ఖ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
91
+ result: 'kh'
92
+ - pattern: ([గ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
93
+ result: 'g'
94
+ - pattern: ([ఘ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
95
+ result: 'gh'
96
+ - pattern: ([ఙ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
97
+ result: 'ṅ'
98
+ - pattern: ([చ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
99
+ result: 'ch'
100
+ - pattern: ([ఛ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
101
+ result: 'chh'
102
+ - pattern: ([జ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
103
+ result: 'j'
104
+ - pattern: ([ఝ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
105
+ result: 'jh'
106
+ - pattern: ([ఞ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
107
+ result: 'ñ'
108
+ - pattern: ([ట]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
109
+ result: 'ṭ'
110
+ - pattern: ([ఠ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
111
+ result: 'ṭh'
112
+ - pattern: ([డ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
113
+ result: 'ḍ'
114
+ - pattern: ([ఢ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
115
+ result: 'ḍh'
116
+ - pattern: ([ణ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
117
+ result: 'ṇ'
118
+ - pattern: ([త]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
119
+ result: 't'
120
+ - pattern: ([థ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
121
+ result: 'th'
122
+ - pattern: ([ద]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
123
+ result: 'd'
124
+ - pattern: ([ధ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
125
+ result: 'dh'
126
+ - pattern: ([న]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
127
+ result: 'n'
128
+ - pattern: ([ప]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
129
+ result: 'p'
130
+ - pattern: ([ఫ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
131
+ result: 'ph'
132
+ - pattern: ([బ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
133
+ result: 'b'
134
+ - pattern: ([భ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
135
+ result: 'bh'
136
+ - pattern: ([మ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
137
+ result: 'm'
138
+ - pattern: ([య]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
139
+ result: 'y'
140
+ - pattern: ([ర]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
141
+ result: 'r'
142
+ - pattern: ([ఱ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
143
+ result: 'r'
144
+ - pattern: ([ల]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
145
+ result: 'l'
146
+ - pattern: ([వ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
147
+ result: 'v'
148
+ - pattern: ([శ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
149
+ result: 'sh'
150
+ - pattern: ([ష]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
151
+ result: 'ṣh'
152
+ - pattern: ([స]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
153
+ result: 's'
154
+ - pattern: ([హ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
155
+ result: 'h'
156
+ - pattern: ([ళ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
157
+ result: 'ḷ'
158
+
159
+ characters:
160
+
161
+ # I. Independent vowel characters
162
+
163
+ 'అ': 'a'
164
+ 'ఆ': 'ā'
165
+ 'ఇ': 'i'
166
+ 'ఈ': 'ī'
167
+ 'ఉ': 'u'
168
+ 'ఊ': 'ū'
169
+ 'ఋ': 'ṛ'
170
+ 'ౠ': 'ṝ'
171
+ 'ఎ': 'ĕ'
172
+ 'ఏ': 'e'
173
+ 'ఐ': 'ai'
174
+ 'ఒ': 'ŏ'
175
+ 'ఓ': 'o'
176
+ 'ఔ': 'au'
177
+
178
+ # II. Abbreviated vowel characters and other symbols
179
+
180
+ 'ా': 'ā'
181
+ 'ి': 'i' # Variations: చి chi, ని ni, యి yi, లి li, etc.
182
+ 'ీ': 'ī'
183
+ 'ు': 'u' # Variations: జు ju, పు pu, వు vu, etc.
184
+ 'ూ': 'ū'
185
+ 'ృ': 'ṛ'
186
+ 'ె': 'ĕ'
187
+ 'ే': 'e'
188
+ 'ై': 'ai'
189
+ 'ొ': 'ŏ' # Variations: మొ mŏ, యొ yŏ.
190
+ 'ో': 'o' # Variations: మో mo, యో yo.
191
+ 'ౌ': 'au'
192
+ 'ঁ': 'ṁ'
193
+ 'ఁ': 'm̐' # Signified historic nasal sound, now obsolete.
194
+ 'ః': 'ḥ'
195
+ 'ం': 'ṃ'
196
+ '\u0c4d': '' # End-of-syllable mark (i.e., a consonant without a vowel): రంగపూర్ Raṁgapūr.
197
+
198
+ # III. Consonant characters
199
+
200
+ # Gutturals
201
+ 'క': 'ka'
202
+ 'ఖ': 'kha'
203
+ 'గ': 'ga'
204
+ 'ఘ': 'gha'
205
+ 'ఙ': 'ṅa'
206
+
207
+ # Palatals
208
+ 'చ': 'cha'
209
+ 'ఛ': 'chha'
210
+ 'జ': 'ja'
211
+ 'ఝ': 'jha'
212
+ 'ఞ': 'ña'
213
+
214
+ # Cerebrals
215
+ 'ట': 'ṭa'
216
+ 'ఠ': 'ṭha'
217
+ 'డ': 'ḍa'
218
+ 'ఢ': 'ḍha'
219
+ 'ణ': 'ṇa'
220
+
221
+ # Dentals
222
+ 'త': 'ta'
223
+ 'థ': 'tha'
224
+ 'ద': 'da'
225
+ 'ధ': 'dha'
226
+ 'న': 'na'
227
+
228
+ # Labials
229
+ 'ప': 'pa'
230
+ 'ఫ': 'pha'
231
+ 'బ': 'ba'
232
+ 'భ': 'bha'
233
+ 'మ': 'ma'
234
+
235
+ # Semivowels
236
+ 'య': 'ya'
237
+ 'ర': 'ra' #Variant: ఱ.
238
+ 'ఱ': 'ra'
239
+ 'ల': 'la'
240
+ 'వ': 'va'
241
+
242
+ # Sibilants
243
+ 'శ': 'sha'
244
+ 'ష': 'ṣha'
245
+ 'స': 'sa'
246
+
247
+ # Aspirate
248
+ 'హ': 'ha'
249
+ 'ళ' : 'ḷa'
250
+
251
+ '\u09CD': '' # Used for joining
252
+ 'ౕ ': ''
253
+ 'ౖ ': ''
254
+ '्': ''
255
+ '़': ''
256
+ "‍": ''# Used for joining
257
+ "‌": ''# Used for non joining
258
+
259
+ # numbers
260
+
261
+ '౦': '0'
262
+ '౧': '1'
263
+ '౨': '2'
264
+ '౩': '3'
265
+ '౪': '4'
266
+ '౫': '5'
267
+ '౬': '6'
268
+ '౭': '7'
269
+ '౮': '8'
270
+ '౯': '9'