interscript 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,236 @@
1
+ ---
2
+ authority_id: ungen
3
+ id: 2017
4
+ language: iso-639-3:prs # prs stands for Dari (https://iso639-3.sil.org/code/prs&_ga=GA1.2.2054538372.1574092823)
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: Persian UN 1967
8
+ url: http://www.eki.ee/wgrs/v2_2/rom1_fa.htm
9
+ creation_date: 1967
10
+ confirmation_date: 01-2003
11
+ description: |
12
+ The United Nations recommended system was approved in 1967 (
13
+ I/13), based on the official system adopted by Iran and
14
+ published in its English version as Transliteration of
15
+ Farsi Geographic Names to Latin Alphabet (September 1966).
16
+ The romanization table was also published as an annex to
17
+ the Toponymic Guidelines for the Islamic Republic of Iran
18
+ in 2000 (Toponymic Guidelines for map and other editors –
19
+ Revised edition 1998. Submitted by the Islamic Republic of
20
+ Iran. UNGEGN, 20th session. New York, 17-28 January 2000,
21
+ Working Paper No. 41.).
22
+
23
+ The system is used in the Islamic Republic of Iran and in
24
+ international cartographic products.
25
+
26
+ Persian (Farsi) uses the Perso-Arabic script that is
27
+ written from right to left. The Persian script usually
28
+ omits vowel points and diacritical marks from writing which
29
+ makes it difficult to obtain uniform results in the
30
+ romanization of Persian. The romanization is generally
31
+ reversible though there are some ambiguous letter
32
+ sequences (kh, sh, th, zh) which also may represent the
33
+ romanized values of two Persian characters in addition to
34
+ the respective single ones.
35
+
36
+
37
+ notes:
38
+ - A Word-initially.
39
+ - B Not romanized; marks absence of the vowel.
40
+ - C Doubling of the consonant letter.
41
+ - D After a consonant (excl. -ah).
42
+ - E After a vowel (see also note 2).
43
+ - 1-The adjectival ending of Arabic origin -يه in Persian is
44
+ romanized -īyeh. In romanizing the definite article the
45
+ same rules of assimilation of consonants are applied as in
46
+ Arabic, e.g. زين الدين Zeyn od Dīn.
47
+
48
+ - 2-The relational suffix (eẕāfeh) -e is usually not
49
+ expressed in Persian writing after a consonant. After final
50
+ ا or و it is written with ى, e.g. پاى آب Pā-ye Āb. After
51
+ final ى and ه it is expressed by writing hamzeh over the
52
+ character دهانۀ ممبر Dahāneh-ye Mambar.
53
+
54
+ - 3-To point Persian vowels two systems are in use that are
55
+ separated by a column in the table. The first system is a
56
+ Persian one while the other adheres to the Arabic
57
+ tradition. In normal spelling vowel points are not used.
58
+
59
+ tests:
60
+ - source: اَنجِيرة
61
+ expected: Anjīrah
62
+
63
+ - source: اِيْوَانِي
64
+ expected: Eyvānī
65
+
66
+ - source: آبَادَان
67
+ expected: Ābādān
68
+
69
+ - source: قُرآن
70
+ expected: Qor’ān
71
+
72
+ - source: مَآب
73
+ expected: Ma’āb
74
+
75
+ - source: مُحَمَّد
76
+ expected: Moḩammad
77
+
78
+ - source: كُوهِ مَرغُوب
79
+ expected: Kūh-e Marghūb
80
+
81
+ - source: پَايِ آب
82
+ expected: Pā-ye Āb
83
+
84
+ - source: جُويِ آس
85
+ expected: Jū-ye Ās
86
+
87
+ - source: دَهَانِهٴ مَمبَر
88
+ expected: Dahāneh-ye Mambar
89
+
90
+ - source: سَلَسِيٴ بُذُرگ
91
+ expected: Salasī-ye Boz̄org
92
+
93
+ - source: ذُو الفَقَار
94
+ expected: Z̄ū ol Faqār
95
+
96
+
97
+ map:
98
+ postrules:
99
+ - pattern: (?<=\b)(?<!\b[‘|’|'|-])[\u0061-\uFFFF]
100
+ result: "upcase"
101
+
102
+ - pattern: " Al"
103
+ result: " al"
104
+
105
+ - pattern: " Ol"
106
+ result: " ol"
107
+
108
+ characters:
109
+
110
+ '\u064e' : 'a' # َ fatha
111
+ '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
112
+ '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
113
+ '\u0650' : 'e' # ِ kasra
114
+ '\u064f' : 'o' # ُ damma
115
+ '\u0652' : '' # ْ sokoon, see Note B
116
+
117
+ '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
118
+ '\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي
119
+ '\b\u0622' : 'ā' # آ NOTE A
120
+ '\u0622' : '’ā' # آ
121
+ '\u0650\u064a' : 'ī' # ـِي kasra followed by ي
122
+ '\u064f\u0648' : 'ū' # ـُو damma followed by و
123
+ '[\u064e|\u0650]\u064a\u0652' : 'ey' # ـَيْ
124
+ '[\u064e|\u064f]\u0648\u0652' : 'ow' # ـَوْ
125
+ '\u0621' : '’' # ء
126
+ '\u2013' : '–'
127
+ '\u2013[\u0649|\u064a]\u0647' : '-īyeh'
128
+ '[\u0654|\u0674]' : '-e' # ٴ ezafeh
129
+ '(?<=[\u064a|\u0647])[\u0654|\u0674]' : '-ye' # ٴ ezafeh
130
+ '\u0650\b' : '-e' # ِ kasra
131
+ '[\u064a|\u06cc]\u0650\b' : '-ye' # ِ kasra
132
+
133
+ # NOTE C
134
+ '\u0628\u0651' : 'bb' # ب
135
+ '\u062a\u0651' : 'tt' # ت
136
+ '\u062b\u0651' : 's̄s̄' # ث
137
+ '\u062c\u0651' : 'jj' # ج
138
+ '\u062d\u0651' : 'ḩḩ' # ح
139
+ '\u062e\u0651' : 'kh' # خ
140
+ '\u062f\u0651' : 'dd' # د
141
+ '\u0630\u0651' : 'z̄z̄' # ذ
142
+ '\u0631\u0651' : 'rr' # ر
143
+ '\u0632\u0651' : 'zz' # ز
144
+ '\u0633\u0651' : 'ss' # س
145
+ '\u0634\u0651' : 'sh' # ش
146
+ '\u0635\u0651' : 'şş' # ص
147
+ '\u0636\u0651' : 'ẕẕ' # ض
148
+ '\u0637\u0651' : 'ţţ' # ط
149
+ '\u0638\u0651' : 'z̧z̧' # ظ
150
+ '\u063a\u0651' : 'gh' # غ
151
+ '\u0641\u0651' : 'ff' # ف
152
+ '\u0642\u0651' : 'qq' # ق
153
+ '\u0643\u0651' : 'kk' # ك
154
+ '\u0644\u0651' : 'll' # ل
155
+ '\u0645\u0651' : 'mm' # م
156
+ '\u0646\u0651' : 'nn' # ن
157
+ '\u0647\u0651' : 'hh' # ه
158
+ '\u0648\u0651' : 'vv' # و
159
+ '\u064a\u0651' : 'yy' # ي
160
+
161
+ # NOTE 1
162
+ # Sun letters
163
+ '\b\u0627\u0644\u062a' : 'ot t' # الت
164
+ '\b\u0627\u0644\u062b' : 'os̄ s̄' # الث
165
+ '\b\u0627\u0644\u062f' : 'od d' # الد
166
+ '\b\u0627\u0644\u0630' : 'oz̄ z̄' # الذ
167
+ '\b\u0627\u0644\u0631' : 'or r' # الر
168
+ '\b\u0627\u0644\u0632' : 'oz z' # الز
169
+ '\b\u0627\u0644\u0633' : 'os s' # الس
170
+ '\b\u0627\u0644\u0634' : 'osh sh' # الش
171
+ '\b\u0627\u0644\u0635' : 'oş ş' # الص
172
+ '\b\u0627\u0644\u0636' : 'oẕ ẕ' # الض
173
+ '\b\u0627\u0644\u0637' : 'oţ ţ' # الط
174
+ '\b\u0627\u0644\u0638' : 'oz̧ z̧' # الظ
175
+ '\b\u0627\u0644\u0644' : 'ol l' # الل
176
+ '\b\u0627\u0644\u0646' : 'on n' # الن
177
+
178
+ '\u0650\u064a\u0651' : 'īy' # ـِيَّ
179
+ '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
180
+
181
+ # ta' marboota
182
+ '\u0629' : 'at' # ة in the middle of the sentence
183
+ '\u0629$' : 'ah'
184
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
185
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
186
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
187
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
188
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
189
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
190
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
191
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
192
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
193
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
194
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
195
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
196
+
197
+ '\b\u0627\u0644' : 'al ' # ال
198
+ '\s\b\u0627\u0644' : ' ol ' # ال #special Rule 1
199
+
200
+ '\b\u0627' : '' # ا initial
201
+ '\u0627' : 'ā' # ا middial
202
+ '\u0627\b' : 'ā' # ا final
203
+
204
+ '\u0628' : 'b' # ب
205
+ '\u067E' : 'p' # پ
206
+ '\u062A' : 't' # ت
207
+ '\u062B' : 's̄' # ث
208
+ '\u062C' : 'j' # ج
209
+ '\u0686' : 'ch' # ‫چ‬
210
+ '\u062D' : 'ḩ' # ح
211
+ '\u062E' : 'kh' # خ
212
+ '\u062F' : 'd' # د
213
+ '\u0630' : 'z̄' # ذ
214
+ '\u0631' : 'r' # ر
215
+ '\u0632' : 'z' # ز
216
+ '\u0698' : 'zh' # ‫ژ‬
217
+ '\u0633' : 's' # س
218
+ '\u0634' : 'sh' # ش
219
+ '\u0635' : 'ş' # ص
220
+ '\u0636' : 'ẕ' # ض
221
+ '\u0637' : 'ţ' # ط
222
+ '\u0638' : 'z̧' # ظ
223
+ '\u0639' : '’' # ع
224
+ '\u063A' : 'gh' # غ
225
+ '\u0641' : 'f' # ف
226
+ '\u0642' : 'q' # ق
227
+ '\u0643' : 'k' # ك
228
+ '\u06A9' : 'k' # ک
229
+ '\u06AF' : 'g' # گ
230
+ '\u0644' : 'l' # ل‫‬
231
+ '\u0645' : 'm' # م
232
+ '\u0646' : 'n' # ن
233
+ '\u0648' : 'v' # و
234
+ '\u0647' : 'h' # ه
235
+ '\u0649' : 'y' # ي
236
+ '\u064a' : 'y' # ي
@@ -0,0 +1,194 @@
1
+ ---
2
+ authority_id: un
3
+ id: 1972
4
+ language: iso-639-2:tam
5
+ source_script: Gujr
6
+ destination_script: Taml
7
+ name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES --Tamil Romanization Version 4.0
8
+ url: https://www.eki.ee/wgrs/rom1_ta.htm
9
+ creation_date: 1972
10
+ confirmation_date: 2016
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12), based on a report prepared by
13
+ D. N. Sharma. The tables and their corrections were published in volume II of the conference reports.
14
+
15
+ There is no evidence of the use of the system either in India, Sri Lanka or in international cartographic products.
16
+
17
+ Tamil uses an alphasyllabic script whereby each character represents a syllable rather than one sound. Vowels and diphthongs
18
+ are marked in two ways: as independent characters (used syllable-initially) and in an abbreviated form, to denote vowels after
19
+ consonants. The romanization table is unambiguous. The system is on the whole reversible.
20
+
21
+ notes:
22
+
23
+ - |
24
+ ஜி Variation: டி ṭi.
25
+ - |
26
+ ஜீ Variation: டீ ṭī.
27
+ - |
28
+ ஜு Variations: கு ku, ஙு ṅu, சு chu, ஞு ñu, டு ṭu, ணு ṇu, து tu, நு nu, பு pu, மு mu, யு yu, ரு ru, லு lu, etc.
29
+ - |
30
+ ஜூ Variations: கூ kū, ஙூ ṅū, சூ chū, ஞூ ñū, டூ ṭū, ணூ ṇū, தூ tū, நூ nū, பூ pū, மூ mū, etc.
31
+ - |
32
+ ஃ Absent in the original table.
33
+ - |
34
+ ் Pronunciation without a vowel: க் k.
35
+ - |
36
+ ஜ, ஶ, ஸ, ன Characters borrowed from the Grantha script.
37
+ - |
38
+ ஸ Special syllable character: ஸ்ரீ srī.
39
+
40
+ tests:
41
+ - source: "அழிந்து போன நகரத்தில் , தொலைந்து போன நான்"
42
+ expected: "al̮intu poṉa nakarattil , tŏlaintu poṉa nāṉ"
43
+ - source: "முதன் முதலாக - மை ஃபர்ஸ்ட் சோலோ ட்ராவல்"
44
+ expected: "mutaṉ mutalāka - mai ḥparsṭ cholo ṭrāval"
45
+ - source: "வாழ்க்கையில் அவன் போன முதல் சோலோ டிரிப் அது தான்."
46
+ expected: "vāl̮kkaiyil avaṉ poṉa mutal cholo ṭirip atu tāṉ."
47
+ - source: "ஸ்கூல் ப்ரெண்ட் கார்த்திக் வீட்டுக்கு போய்ட்டு"
48
+ expected: "skūl prĕṇṭ kārttik vīṭṭukku poyṭṭu"
49
+ - source: "நாசா வெளியிட்ட வெடிக்கும் நட்சத்திரத்தின் வீடியோ"
50
+ expected: "nāchā vĕḷiyiṭṭa vĕṭikkum naṭchattirattiṉ vīṭiyo"
51
+ - source: "டார்பிடோவை ஏவ உதவும் சூப்பர்சானிக் ஏவுகணையான ஸ்மார்ட் சோதனை வெற்றி"
52
+ expected: "ṭārpiṭovai eva utavum chūpparchāṉik evukaṇaiyāṉa smārṭ chotaṉai vĕṟṟi"
53
+ - source: "இந்த ஆண்டு மருத்துவத்துக்கான நோபல் பரிசு பெறுபவர்களின் பெயர்கள் அறிவிப்பு"
54
+ expected: "inta āṇṭu maruttuvattukkāṉa nopal parichu pĕṟupavarkaḷiṉ pĕyarkaḷ aṟivippu"
55
+ - source: "மல்லையா விவகாரம்: பிரிட்டன் அரசின் நடவடிக்கைகள் தங்களுக்கு தெரியவில்லை - மத்திய அரசு தகவல்"
56
+ expected: "mallaiyā vivakāram: piriṭṭaṉ arachiṉ naṭavaṭikkaikaḷ taṅkaḷukku tĕriyavillai - mattiya arachu takaval"
57
+ - source: "ஆலோசனைக்குப் பிறகு தேனியில் இருந்து சென்னை புறப்பட்டார் துணை முதலமைச்சர் பன்னீர்செல்வம்"
58
+ expected: "ālochaṉaikkup piṟaku teṉiyil iruntu chĕṉṉai puṟappaṭṭār tuṇai mutalamaichchar paṉṉīrchĕlvam"
59
+ - source: "இன்று தான் பேரன் பிறந்தநாள் முடிந்து ஃப்ரீ ஆகி இருக்கிறேன்"
60
+ expected: "iṉṟu tāṉ peraṉ piṟantanāḷ muṭintu ḥprī āki irukkiṟeṉ"
61
+
62
+ map:
63
+
64
+ rules:
65
+ - pattern: ([க]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
66
+ result: 'k'
67
+ - pattern: ([ங]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
68
+ result: 'ṅ'
69
+ - pattern: ([ச]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
70
+ result: 'ch'
71
+ - pattern: ([ஞ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
72
+ result: 'ñ'
73
+ - pattern: ([ட]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
74
+ result: 'ṭ'
75
+ - pattern: ([ண]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
76
+ result: 'ṇ'
77
+ - pattern: ([த]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
78
+ result: 't'
79
+ - pattern: ([ந]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
80
+ result: 'n'
81
+ - pattern: ([ப]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
82
+ result: 'p'
83
+ - pattern: ([ம]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
84
+ result: 'm'
85
+ - pattern: ([ய]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
86
+ result: 'y'
87
+ - pattern: ([ர]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
88
+ result: 'r'
89
+ - pattern: ([ல]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
90
+ result: 'l'
91
+ - pattern: ([வ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
92
+ result: 'v'
93
+ - pattern: ([ழ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
94
+ result: 'l̮'
95
+ - pattern: ([ள]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
96
+ result: 'ḷ'
97
+ - pattern: ([ற]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
98
+ result: 'ṟ'
99
+ - pattern: ([ன]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
100
+ result: 'ṉ'
101
+ - pattern: ([ஜ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
102
+ result: 'j'
103
+ - pattern: ([ஶ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
104
+ result: 'sh'
105
+ - pattern: ([ஷ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
106
+ result: 'ṣh'
107
+ - pattern: ([ஸ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
108
+ result: 's'
109
+ - pattern: ([ஹ]=?)(?=[\u0bbe\u0bbf\u0bc0\u0bc1\u0bc2\u0bc6\u0bc7\u0bc8\u0bca\u0bcb\u0bcc\u25cc\u0bcd])
110
+ result: 'h'
111
+
112
+
113
+ characters:
114
+ 'அ': 'a'
115
+ 'ஆ': 'ā'
116
+ 'ா': 'ā'
117
+
118
+ 'இ': 'i'
119
+ 'ி': 'i'
120
+
121
+ 'ஈ': 'ī'
122
+ 'ீ': 'ī'
123
+
124
+ 'உ': 'u'
125
+ 'ு': 'u'
126
+
127
+ 'ஊ': 'ū'
128
+ 'ூ': 'ū'
129
+
130
+ 'ெ': "ĕ"
131
+ 'எ': 'ĕ'
132
+
133
+ 'ே': "e"
134
+ 'ஏ': 'e'
135
+
136
+ 'ஐ': 'ai'
137
+ 'ை': "ai"
138
+
139
+ 'ஒ': 'ŏ'
140
+ 'ொ': 'ŏ'
141
+
142
+ 'ோ': 'o'
143
+ 'ஓ': 'o'
144
+
145
+ 'ஔ': 'au'
146
+ 'ௌ': 'au'
147
+
148
+ 'ஃ': 'ḥ'
149
+ '்': ''
150
+
151
+ 'க': 'ka'
152
+ 'ங': 'ṅa'
153
+ 'ச': 'cha'
154
+ 'ஞ': 'ña'
155
+ 'ட': 'ṭa'
156
+ 'ண': 'ṇa'
157
+ 'த': 'ta'
158
+ 'ந': 'na'
159
+ 'ப': 'pa'
160
+ 'ம': 'ma'
161
+ 'ய': 'ya'
162
+ 'ர': 'ra'
163
+ 'ல': 'la'
164
+ 'வ': 'va'
165
+ 'ழ': 'l̮a'
166
+ 'ள': 'ḷa'
167
+ 'ற': 'ṟa'
168
+ 'ன': 'ṉa'
169
+ 'ஜ': 'ja'
170
+ 'ஶ': 'sha'
171
+ 'ஷ': 'ṣha'
172
+ 'ஸ': 'sa'
173
+ 'ஹ': 'ha'
174
+ 'க்ஷ': 'kṣha'
175
+ 'ற்ற': 'ṟṟa'
176
+ 'ன்ற': 'ṉṟa'
177
+
178
+ # Digits
179
+ '௧': '1'
180
+ '௨': '2'
181
+ '௩': '3'
182
+ '௪': '4'
183
+ '௫': '5'
184
+ '௬': '6'
185
+ '௭': '7'
186
+ '௮': '8'
187
+ '௯': '9'
188
+ '௦': '0'
189
+ '௰': '10'
190
+ '௱': '100'
191
+ '௲': '1000'
192
+
193
+ "‍": '' # no need for zero with joiner
194
+ "‌": '' # no need for zero with non joiner
@@ -0,0 +1,270 @@
1
+ ---
2
+ authority_id: un
3
+ id: 1972
4
+ language: iso-639-2:tel
5
+ source_script: Telu
6
+ destination_script: Latn
7
+ name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Telugu Romanization, 1972
8
+ url: https://www.eki.ee/wgrs/rom1_te.htm
9
+ creation_date: 1972
10
+ confirmation_date: 2016
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12),
13
+ based on a report prepared by D. N. Sharma. The tables and their corrections were published in volume II
14
+ of the conference reports.
15
+
16
+ There is no evidence of the use of the system either in India or in international cartographic products.
17
+
18
+ Telugu uses an alphasyllabic script whereby each character represents a syllable rather than one sound.
19
+ Vowels and diphthongs are marked in two ways: as independent characters (used syllable-initially) and in
20
+ an abbreviated form, to denote vowels after consonants. The romanization table is unambiguous. The system is
21
+ mostly reversible but there may exist some ambiguities in the romanization of vowels (independent vs. abbreviated characters)
22
+ and consonants (combinations with subscript consonants vs. character sequences).
23
+
24
+ notes:
25
+
26
+ - |
27
+ Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
28
+ vowels following a consonant can be found in grammars; no distinction between the two is
29
+ made in transliteration.
30
+ - |
31
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
32
+ transliteration, with the following exceptions:
33
+ a) when another vowel is indicated by its appropriate sign; and
34
+ b) when the absence of any vowel is indicated by the superscript sign ( ౬ ) called valapalagilaka.
35
+ - |
36
+ Exception: Sunna is transliterated by:
37
+ a) ṅ before gutturals,
38
+ b) ñ before palatals,
39
+ c) ṇ before cerebrals,
40
+ d) n before dentals, and
41
+ e) m before labials.
42
+ - |
43
+ Ardhasunna before gutturals and palatal, cerebral, and dental occlusives is transliterated n̐.
44
+ Before labials, sibilants, semivowels, the aspirate, vowels, and in final position it is
45
+ transliterated m
46
+
47
+ tests:
48
+ - source: "తమిళనాడు"
49
+ expected: "tamiḷanāḍu"
50
+ - source: "తంటికొండ ఘటన: ఆగని మృత్యుఘోష"
51
+ expected: "taṃṭikŏṃḍa ghaṭana: āgani mṛtyughoṣha"
52
+ - source: "మళ్లీ వివాదం: అమితాబ్‌పై కేసు"
53
+ expected: "maḷlī vivādaṃ: amitābpai kesu"
54
+ - source: "వరద సాయం పేరుతో వైట్ కాలర్ దోపిడీ"
55
+ expected: "varada sāyaṃ peruto vaiṭ kālar dopiḍī"
56
+ - source: "రెండో విడత జీఎస్టీ పరిహారం"
57
+ expected: "rĕṃḍo viḍata jīĕsṭī parihāraṃ"
58
+ - source: "నితీష్‌ కుమార్‌ అధ్యాయం ముగిసినట్లేనా?!"
59
+ expected: "nitīṣh kumār adhyāyaṃ mugisinaṭlenā?!"
60
+ - source: "వారిపై జీవితాంతం నిషేధం విధించండి!"
61
+ expected: "vāripai jīvitāṃtaṃ niṣhedhaṃ vidhiṃchaṃḍi!"
62
+ - source: "మరో లాక్‌డౌన్‌ వల్ల అన్నీ అనర్థాలే!"
63
+ expected: "maro lākḍaun valla annī anarthāle!"
64
+ - source: "జెసిండా మరో సంచలనం"
65
+ expected: "jĕsiṃḍā maro saṃchalanaṃ"
66
+ - source: "స్వీయ నిర్బంధంలోకి డబ్ల్యూహెచ్‌ఓ డైరెక్టర్‌"
67
+ expected: "svīya nirbaṃdhaṃloki ḍablyūhĕcho ḍairĕkṭar"
68
+ - source: "కరోనాపై యుద్ధంలో సమిధలు"
69
+ expected: "karonāpai yuddhaṃlo samidhalu"
70
+ - source: "అమెరికా ఎన్నికలు: ‘పెద్దన్న’ ఎవరో?!"
71
+ expected: "amĕrikā ĕnnikalu: ‘pĕddanna’ ĕvaro?!"
72
+ - source: "౪౬౨౬౯"
73
+ expected: "46269"
74
+ - source: "రంగపూర్"
75
+ expected: "raṃgapūr"
76
+ # subscript consonant characters
77
+ - source: "ట్ట"
78
+ expected: "ṭṭa"
79
+ - source: "ప్ప"
80
+ expected: "ppa"
81
+ - source: "చ్చ"
82
+ expected: "chcha"
83
+
84
+
85
+ map:
86
+
87
+ rules:
88
+ - pattern: ([క]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
89
+ result: 'k'
90
+ - pattern: ([ఖ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
91
+ result: 'kh'
92
+ - pattern: ([గ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
93
+ result: 'g'
94
+ - pattern: ([ఘ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
95
+ result: 'gh'
96
+ - pattern: ([ఙ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
97
+ result: 'ṅ'
98
+ - pattern: ([చ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
99
+ result: 'ch'
100
+ - pattern: ([ఛ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
101
+ result: 'chh'
102
+ - pattern: ([జ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
103
+ result: 'j'
104
+ - pattern: ([ఝ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
105
+ result: 'jh'
106
+ - pattern: ([ఞ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
107
+ result: 'ñ'
108
+ - pattern: ([ట]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
109
+ result: 'ṭ'
110
+ - pattern: ([ఠ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
111
+ result: 'ṭh'
112
+ - pattern: ([డ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
113
+ result: 'ḍ'
114
+ - pattern: ([ఢ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
115
+ result: 'ḍh'
116
+ - pattern: ([ణ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
117
+ result: 'ṇ'
118
+ - pattern: ([త]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
119
+ result: 't'
120
+ - pattern: ([థ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
121
+ result: 'th'
122
+ - pattern: ([ద]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
123
+ result: 'd'
124
+ - pattern: ([ధ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
125
+ result: 'dh'
126
+ - pattern: ([న]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
127
+ result: 'n'
128
+ - pattern: ([ప]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
129
+ result: 'p'
130
+ - pattern: ([ఫ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
131
+ result: 'ph'
132
+ - pattern: ([బ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
133
+ result: 'b'
134
+ - pattern: ([భ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
135
+ result: 'bh'
136
+ - pattern: ([మ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
137
+ result: 'm'
138
+ - pattern: ([య]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
139
+ result: 'y'
140
+ - pattern: ([ర]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
141
+ result: 'r'
142
+ - pattern: ([ఱ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
143
+ result: 'r'
144
+ - pattern: ([ల]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
145
+ result: 'l'
146
+ - pattern: ([వ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
147
+ result: 'v'
148
+ - pattern: ([శ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
149
+ result: 'sh'
150
+ - pattern: ([ష]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
151
+ result: 'ṣh'
152
+ - pattern: ([స]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
153
+ result: 's'
154
+ - pattern: ([హ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
155
+ result: 'h'
156
+ - pattern: ([ళ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d])
157
+ result: 'ḷ'
158
+
159
+ characters:
160
+
161
+ # I. Independent vowel characters
162
+
163
+ 'అ': 'a'
164
+ 'ఆ': 'ā'
165
+ 'ఇ': 'i'
166
+ 'ఈ': 'ī'
167
+ 'ఉ': 'u'
168
+ 'ఊ': 'ū'
169
+ 'ఋ': 'ṛ'
170
+ 'ౠ': 'ṝ'
171
+ 'ఎ': 'ĕ'
172
+ 'ఏ': 'e'
173
+ 'ఐ': 'ai'
174
+ 'ఒ': 'ŏ'
175
+ 'ఓ': 'o'
176
+ 'ఔ': 'au'
177
+
178
+ # II. Abbreviated vowel characters and other symbols
179
+
180
+ 'ా': 'ā'
181
+ 'ి': 'i' # Variations: చి chi, ని ni, యి yi, లి li, etc.
182
+ 'ీ': 'ī'
183
+ 'ు': 'u' # Variations: జు ju, పు pu, వు vu, etc.
184
+ 'ూ': 'ū'
185
+ 'ృ': 'ṛ'
186
+ 'ె': 'ĕ'
187
+ 'ే': 'e'
188
+ 'ై': 'ai'
189
+ 'ొ': 'ŏ' # Variations: మొ mŏ, యొ yŏ.
190
+ 'ో': 'o' # Variations: మో mo, యో yo.
191
+ 'ౌ': 'au'
192
+ 'ঁ': 'ṁ'
193
+ 'ఁ': 'm̐' # Signified historic nasal sound, now obsolete.
194
+ 'ః': 'ḥ'
195
+ 'ం': 'ṃ'
196
+ '\u0c4d': '' # End-of-syllable mark (i.e., a consonant without a vowel): రంగపూర్ Raṁgapūr.
197
+
198
+ # III. Consonant characters
199
+
200
+ # Gutturals
201
+ 'క': 'ka'
202
+ 'ఖ': 'kha'
203
+ 'గ': 'ga'
204
+ 'ఘ': 'gha'
205
+ 'ఙ': 'ṅa'
206
+
207
+ # Palatals
208
+ 'చ': 'cha'
209
+ 'ఛ': 'chha'
210
+ 'జ': 'ja'
211
+ 'ఝ': 'jha'
212
+ 'ఞ': 'ña'
213
+
214
+ # Cerebrals
215
+ 'ట': 'ṭa'
216
+ 'ఠ': 'ṭha'
217
+ 'డ': 'ḍa'
218
+ 'ఢ': 'ḍha'
219
+ 'ణ': 'ṇa'
220
+
221
+ # Dentals
222
+ 'త': 'ta'
223
+ 'థ': 'tha'
224
+ 'ద': 'da'
225
+ 'ధ': 'dha'
226
+ 'న': 'na'
227
+
228
+ # Labials
229
+ 'ప': 'pa'
230
+ 'ఫ': 'pha'
231
+ 'బ': 'ba'
232
+ 'భ': 'bha'
233
+ 'మ': 'ma'
234
+
235
+ # Semivowels
236
+ 'య': 'ya'
237
+ 'ర': 'ra' #Variant: ఱ.
238
+ 'ఱ': 'ra'
239
+ 'ల': 'la'
240
+ 'వ': 'va'
241
+
242
+ # Sibilants
243
+ 'శ': 'sha'
244
+ 'ష': 'ṣha'
245
+ 'స': 'sa'
246
+
247
+ # Aspirate
248
+ 'హ': 'ha'
249
+ 'ళ' : 'ḷa'
250
+
251
+ '\u09CD': '' # Used for joining
252
+ 'ౕ ': ''
253
+ 'ౖ ': ''
254
+ '्': ''
255
+ '़': ''
256
+ "‍": ''# Used for joining
257
+ "‌": ''# Used for non joining
258
+
259
+ # numbers
260
+
261
+ '౦': '0'
262
+ '౧': '1'
263
+ '౨': '2'
264
+ '౩': '3'
265
+ '౪': '4'
266
+ '౫': '5'
267
+ '౬': '6'
268
+ '౭': '7'
269
+ '౮': '8'
270
+ '౯': '9'