interscript 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,229 @@
1
+ ---
2
+ authority_id: un
3
+ id: 1972
4
+ language: iso-639-2:guj
5
+ source_script: Gujr
6
+ destination_script: Latn
7
+ name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES --Gujarati Romanization Version 4.0
8
+ url: https://www.eki.ee/wgrs/rom1_gu.htm
9
+ creation_date: 1972
10
+ confirmation_date: 2016
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12),
13
+ based on a report prepared by D. N. Sharma. The tables and their corrections were published in
14
+ volume II of the conference reports.
15
+
16
+ There is no evidence of the use of the system either in India or in international cartographic products.
17
+
18
+ Gujarati (Gujarātī) uses an alphasyllabic script whereby each character represents a syllable
19
+ rather than one sound. Vowels and diphthongs are marked in two ways: as independent characters
20
+ (used syllable-initially) and in an abbreviated form, to denote vowels after consonants.
21
+ The romanization table is unambiguous but the user would have to recognize many ligatures not
22
+ given in the original table. For two vowel letters (e/ĕ, o/ŏ) there is uncertainty as to the
23
+ conditions in which either of the two variants is to be used. The system is mostly reversible
24
+ but there exist some ambiguities in the romanization of vowels (independent vs. abbreviated characters)
25
+ and consonants (ligatures vs. character sequences).
26
+
27
+ References
28
+
29
+ Second United Nations Conference on the Standardization of Geographical Names. London, 10–31 May 1972.
30
+ Vol. II. Technical papers. United Nations. New York 1974, pp. 134–135.
31
+
32
+ Third United Nations Conference on the Standardization of Geographical Names.
33
+ Athens, 17 August – 7 September 1977. Vol. II, Technical papers, pp. 393 etc.
34
+
35
+ notes:
36
+
37
+ - |
38
+ Exception of ી : જી jī.
39
+ - |
40
+ Exception of ૂ : રૂ rū.
41
+ - |
42
+ ્‌ indicates pronunciation without a vowel.
43
+
44
+ tests:
45
+ - source: "અમિત શાહનો કોરોના રિપોર્ટ ૨ ઓગસ્ટે પોઝિટિવ આવ્યો હતો, ત્યારથી તેમનું સ્વાસ્થ્ય સારું નથી"
46
+ expected: "amita shāhanŏ kŏrŏnā ripŏrṭa 2 ŏgasṭĕ pŏjhiṭiva āvyŏ hatŏ, tyārathī tĕmanuṁ svāsthya sāruṁ nathī"
47
+ - source: "મેદાંતા હોસ્પિટલમાં તેમનો ઇલાજ ચાલી રહ્યો હતો"
48
+ expected: "mĕdāṁtā hŏspiṭalamāṁ tĕmanŏ ilāja chālī rahyŏ hatŏ"
49
+ - source: "ભારતના વિશ્વનાથન આનંદે શેનયાનમાં પહેલો ફિડે શતરંજ વિશ્વ કપ જીત્યો"
50
+ expected: "bhāratanā vishvanāthana ānaṁdĕ shĕnayānamāṁ pahĕlŏ fiḍĕ shataraṁja vishva kapa jītyŏ"
51
+ - source: "ભારતીય વડા પ્રધાન જવાહરલાલ નેહરુએ ૪૦ લાખ હિન્દુઓ અને મુસલમાનોના પારસ્પરિક સ્થાનાંતરણનું સૂચન આપ્યું"
52
+ expected: "bhāratīya vaḍā pradhāna javāharalāla nĕharuĕ 40 lākha hinduŏ anĕ musalamānŏnā pārasparika sthānāṁtaraṇanuṁ sūchana āpyuṁ"
53
+ - source: "લિબિયાના એલ અજિજિયામાં ધરતી પર સૌથી વધુ તાપમાન નોંધાયું. એ વખતે છાયામાં નોંધવામાં આવેલું તાપમાન ૫૮ ડિગ્રી સેલ્સિયસ હતું."
54
+ expected: "libiyānā ĕla ajijiyāmāṁ dharatī para sauthī vadhu tāpamāna nŏṁdhāyuṁ. ĕ vakhatĕ chhāyāmāṁ nŏṁdhavāmāṁ āvĕluṁ tāpamāna 58 ḍigrī sĕlsiyasa hatuṁ."
55
+ - source: "પ્રથમ વિશ્વયુદ્ધઃ જર્મની અને ફ્રાન્સ વચ્ચે એસ્નેની લડાઈ શરૂ થઈ હતી"
56
+ expected: "prathama vishvayuddhaḥ jarmanī anĕ frānsa vachchĕ ĕsnĕnī laḍāī sharū thaī hatī"
57
+ - source: "એન્ગ્લો-મિસ્ત્ર યુદ્ધઃ તેલ અલ કેબિરનું યુદ્ધ લડવામાં આવ્યું હતું."
58
+ expected: "ĕnglŏ-mistra yuddhaḥ tĕla ala kĕbiranuṁ yuddha laḍavāmāṁ āvyuṁ hatuṁ."
59
+ - source: "પુરાવા ન હતા, એ જ કારણે કેસ ચાલ્યો નહીં, પણ તેમને નજરકેદ રાખવામાં આવ્યા"
60
+ expected: "purāvā na hatā, ĕ ja kāraṇĕ kĕsa chālyŏ nahīṁ, paṇa tĕmanĕ najarakĕda rākhavāmāṁ āvyā"
61
+ - source: "સરદાર પટેલે નક્કી કર્યું હતું કે કાશ્મીર ભારતનો હિસ્સો બનશે; ૯૧ વર્ષ પહેલાં લાહોર જેલમાં ભૂખહડતાળ દરમિયાન શહીદ થયા હતા જતીન દાસ"
62
+ expected: "saradāra paṭĕlĕ nakkī karyuṁ hatuṁ kĕ kāshmīra bhāratanŏ hissŏ banashĕ; 91 varṣha pahĕlāṁ lāhŏra jĕlamāṁ bhūkhahaḍatāḷa daramiyāna shahīda thayā hatā jatīna dāsa"
63
+ - source: "કોરોના પ્રોટોકોલ વચ્ચે આજે મેડિકલ પ્રવેશ પરીક્ષા લેવાશેઃ એન્ટ્રી ટચ ફ્રી રહેશે, એડમિટ કાર્ડ બાર કોડથી ચેક થશે"
64
+ expected: "kŏrŏnā prŏṭŏkŏla vachchĕ ājĕ mĕḍikala pravĕsha parīkṣhā lĕvāshĕḥ ĕnṭrī ṭacha frī rahĕshĕ, ĕḍamiṭa kārḍa bāra kŏḍathī chĕka thashĕ"
65
+ - source: "અલ્ ક઼`ઇદ્ માં હવામાન"
66
+ expected: "al ka`id māṁ havāmāna"
67
+ - source: "મંત્રાલય તથા ખ઼.ય ના વિ૨ષ્ઠ અધિકા૨ીઓ ઉપસ્થિત ૨હ્યા હતા"
68
+ expected: "maṁtrālaya tathā kha.ya nā vi2ṣhṭha adhikā2īŏ upasthita 2hyā hatā"
69
+
70
+ map:
71
+
72
+ rules:
73
+ - pattern: ([ક]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
74
+ result: 'k'
75
+ - pattern: ([ખ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
76
+ result: 'kh'
77
+ - pattern: ([ગ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
78
+ result: 'g'
79
+ - pattern: ([ઘ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
80
+ result: 'gh'
81
+ - pattern: ([ઙ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
82
+ result: 'ṅ'
83
+ - pattern: ([ચ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
84
+ result: 'ch'
85
+ - pattern: ([છ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
86
+ result: 'chh'
87
+ - pattern: ([જ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
88
+ result: 'j'
89
+ - pattern: ([ઝ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
90
+ result: 'jh'
91
+ - pattern: ([ઞ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
92
+ result: 'ñ'
93
+ - pattern: ([ટ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
94
+ result: 'ṭ'
95
+ - pattern: ([ઠ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
96
+ result: 'ṭh'
97
+ - pattern: ([ડ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
98
+ result: 'ḍ'
99
+ - pattern: ([ઢ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
100
+ result: 'ḍh'
101
+ - pattern: ([ણ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
102
+ result: 'ṇ'
103
+ - pattern: ([ત]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
104
+ result: 't'
105
+ - pattern: ([થ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
106
+ result: 'th'
107
+ - pattern: ([દ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
108
+ result: 'd'
109
+ - pattern: ([ધ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
110
+ result: 'dh'
111
+ - pattern: ([ન]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
112
+ result: 'n'
113
+ - pattern: ([પ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
114
+ result: 'p'
115
+ - pattern: ([ફ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
116
+ result: 'f'
117
+ - pattern: ([બ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
118
+ result: 'b'
119
+ - pattern: ([ભ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
120
+ result: 'bh'
121
+ - pattern: ([મ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
122
+ result: 'm'
123
+ - pattern: ([ય]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
124
+ result: 'y'
125
+ - pattern: ([ર]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
126
+ result: 'r'
127
+ - pattern: ([લ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
128
+ result: 'l'
129
+ - pattern: ([વ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
130
+ result: 'v'
131
+ - pattern: ([શ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
132
+ result: 'sh'
133
+ - pattern: ([ષ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
134
+ result: 'ṣh'
135
+ - pattern: ([સ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
136
+ result: 's'
137
+ - pattern: ([હ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
138
+ result: 'h'
139
+ - pattern: ([ળ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
140
+ result: 'ḷ'
141
+
142
+ characters:
143
+ # I. Independent vowel characters
144
+ 'અ': 'a'
145
+ 'આ': 'ā'
146
+ 'ઇ': 'i'
147
+ 'ઈ': 'ī'
148
+ 'ઉ': 'u'
149
+ 'ઊ': 'ū'
150
+ 'ઋ': 'ṛ'
151
+ 'ઍ': 'e'
152
+ 'એ': 'ĕ'
153
+ 'ઐ': 'ai'
154
+ 'ઑ': 'o'
155
+ 'ઓ': 'ŏ'
156
+ 'ઔ': 'au'
157
+
158
+ # II. Abbreviated vowel characters
159
+ 'ા': 'ā'
160
+ 'િ': 'i'
161
+ 'ી': 'ī'
162
+ 'ુ': 'u'
163
+ 'ૂ': 'ū'
164
+ 'ૃ': 'ṛ'
165
+ 'ૅ': 'e'
166
+ 'ે': 'ĕ'
167
+ 'ૈ': 'ai'
168
+ 'ૉ': 'o'
169
+ 'ો': 'ŏ'
170
+ 'ૌ': 'au'
171
+
172
+ # III. Other symbols
173
+ 'ઃ': 'ḥ'
174
+ 'ં': 'ṁ'
175
+ 'ઁ': 'm̐'
176
+ '્': ''
177
+
178
+ # IV. Consonant characters
179
+ 'ક': 'ka'
180
+ 'ખ': 'kha'
181
+ 'ગ': 'ga'
182
+ 'ઘ': 'gha'
183
+ 'ઙ': 'ṅa'
184
+ 'ચ': 'cha'
185
+ 'છ': 'chha'
186
+ 'જ': 'ja'
187
+ 'ઝ': 'jha'
188
+ 'ઞ': 'ña'
189
+ 'ટ': 'ṭa'
190
+ 'ઠ': 'ṭha'
191
+ 'ડ': 'ḍa'
192
+ 'ઢ': 'ḍha'
193
+ 'ણ': 'ṇa'
194
+ 'ત': 'ta'
195
+ 'થ': 'tha'
196
+ 'દ': 'da'
197
+ 'ધ': 'dha'
198
+ 'ન': 'na'
199
+ 'પ': 'pa'
200
+ 'ફ': 'fa'
201
+ 'બ': 'ba'
202
+ 'ભ': 'bha'
203
+ 'મ': 'ma'
204
+ 'ય': 'ya'
205
+ 'ર': 'ra'
206
+ 'લ': 'la'
207
+ 'વ': 'va'
208
+ 'શ': 'sha'
209
+ 'ષ': 'ṣha'
210
+ 'સ': 'sa'
211
+ 'હ': 'ha'
212
+ 'ળ': 'ḷa'
213
+
214
+ # digits
215
+ '૦': '0'
216
+ '૧': '1'
217
+ '૨': '2'
218
+ '૩': '3'
219
+ '૪': '4'
220
+ '૫': '5'
221
+ '૬': '6'
222
+ '૭': '7'
223
+ '૮': '8'
224
+ '૯': '9'
225
+
226
+ '઼': ''
227
+ '।': '.'
228
+ '\u09CD': '' # Used for joining
229
+ "‍": ''# Used for joining
@@ -30,34 +30,128 @@ notes:
30
30
  It is recommended that the vowel अ (a) should always be romanized except when it ends a
31
31
  name. If a name ends with a consonant, the consonant should carry a sub-macron. Such
32
32
  cases, however, will be very rare. For example, कानपुर Kānapur (not Kānapura), जगत्
33
- Jagaṯ.
33
+ Jagat.
34
34
  - |
35
35
  If each letter of a digraph or any two parts of a trigraph has a distinct independent sound
36
36
  then it should be indicated by a hyphen, thus d-h.
37
+
38
+ ######################################## Additional Note ############################################################
39
+ # It is recommended that the vowel अ (a) should always be romanized except when it ends a #
40
+ # name. In the scheme we are not yet able to detect a name. Analysing language pattern and the example #
41
+ # we found that actually when a word ends with a consonant it should be transliterated without the vowel in it. #
42
+ # so we are applying this rule as if a word ends with a consonant, in this case we are removing the ending 'a'. #
43
+ #####################################################################################################################
37
44
 
38
45
  tests:
46
+ - source: "दिल्ली"
47
+ expected: "dillī"
48
+ - source: "भारत"
49
+ expected: "bhārat"
39
50
  - source: "परिपक्क"
40
- expected: "paraipakka"
51
+ expected: "paripakk"
41
52
  - source: "जगत्"
42
53
  expected: "jagat"
43
54
  - source: "संख्या"
44
- expected: "saṁkhyaā"
55
+ expected: "saṁkhyā"
45
56
  - source: "गंभीर मरीजों के मामले में भारत दूसरे नंबर पर"
46
- expected: "gaṁbhaīra maraījaoṁ kae maāmalae maeṁ bhaārata daūsarae naṁbara para"
57
+ expected: "gaṁbhīr marījoṁ ke māmale meṁ bhārat dūsare naṁbar par"
47
58
  - source: "कोरोना अपडेट्स"
48
- expected: "kaoraonaā apaḍaeṭsa"
59
+ expected: "koronā apaḍeṭs"
49
60
  - source: "सीडीसी चीफ का बयान अहम"
50
- expected: "saīḍaīsaī chaīpha kaā bayaāna ahama"
61
+ expected: "sīḍīsī chīph bayān aham"
51
62
  - source: "गूगल प्ले स्टोर पर पेटीएम की वापसी"
52
- expected: "gaūgala plae sṭaora para paeṭaīema kaī vaāpasaī"
63
+ expected: "gūgal ple sṭor par peṭīem vāpasī"
53
64
  - source: "भारत में गैंबलिंग की इजाजत नहीं"
54
- expected: "bhaārata maeṁ gaaiṁbalaiṁga kaī ijaājata nahaīṁ"
65
+ expected: "bhārat meṁ gaiṁbaliṁg ijājat nahīṁ"
55
66
  - source: "कोरोना वैक्सीन मुद्दे पर घिरे राष्ट्रपति; जो बाइडेन बोले- मुझे और देश को वैज्ञानिकों पर भरोसा है, डोनाल्ड ट्रम्प पर नहीं"
56
- expected: "kaoraonaā vaaiksaīna mauddae para ghairae raāṣhṭrapatai; jao baāiḍaena baolae- maujhae aura daesha kao vaaijñaānaikaoṁ para bharaosaā haai, ḍaonaālḍa ṭrampa para nahaīṁ"
67
+ expected: "koronā vaiksīn mudde par ghire rāṣhṭrapati; jo bāiḍen bole- mujhe aur desh ko vaijñānikoṁ par bharosā hai, ḍonālḍ ṭramp par nahīṁ"
57
68
  - source: "गूगल की कार्रवाई पर पेटीएम ने कहा था कि ऐप को अस्थायी तौर पर प्ले-स्टोर से हटाया गया है, आपके पैसे सुरक्षित हैं"
58
- expected: "gaūgala kaī kaārravaāī para paeṭaīema nae kahaā thaā kai aipa kao asthaāyaī taaura para plae-sṭaora sae haṭaāyaā gayaā haai, āpakae paaisae saurakṣhaita haaiṁ"
69
+ expected: "gūgal kārravāī par peṭīem ne kahā thā ki aip ko asthāyī taur par ple-sṭor se haṭāyā gayā hai, āpake paise surakṣhit haiṁ"
70
+
59
71
  map:
60
72
 
73
+ rules:
74
+ - pattern: (([क]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([क])(?=\b))
75
+ result: 'k'
76
+ - pattern: (([ख]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ख])(?=\b))
77
+ result: 'kh'
78
+ - pattern: (([ग]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ग])(?=\b))
79
+ result: 'g'
80
+ - pattern: (([घ]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([घ])(?=\b))
81
+ result: 'gh'
82
+ - pattern: (([ङ]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ङ])(?=\b))
83
+ result: 'ṅ'
84
+ - pattern: (([च]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([च])(?=\b))
85
+ result: 'ch'
86
+ - pattern: (([छ]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([छ])(?=\b))
87
+ result: 'chh'
88
+ - pattern: (([ज]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ज])(?=\b))
89
+ result: 'j'
90
+ - pattern: (([झ]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([झ])(?=\b))
91
+ result: 'jh'
92
+ - pattern: (([ञ]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ञ])(?=\b))
93
+ result: 'ñ'
94
+ - pattern: (([ट]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ट])(?=\b))
95
+ result: 'ṭ'
96
+ - pattern: (([ठ]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ठ])(?=\b))
97
+ result: 'ṭh'
98
+ - pattern: (([ड]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ड])(?=\b))
99
+ result: 'ḍ'
100
+ - pattern: (([ढ]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ढ])(?=\b))
101
+ result: 'ḍh'
102
+ - pattern: (([ण]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ण])(?=\b))
103
+ result: 'ṇ'
104
+ - pattern: (([त]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([त])(?=\b))
105
+ result: 't'
106
+ - pattern: (([थ]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([थ])(?=\b))
107
+ result: 'th'
108
+ - pattern: (([द]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([द])(?=\b))
109
+ result: 'd'
110
+ - pattern: (([ध]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ध])(?=\b))
111
+ result: 'dh'
112
+ - pattern: (([न]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([न])(?=\b))
113
+ result: 'n'
114
+ - pattern: (([प]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([प])(?=\b))
115
+ result: 'p'
116
+ - pattern: (([फ]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([फ])(?=\b))
117
+ result: 'ph'
118
+ - pattern: (([ब]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ब])(?=\b))
119
+ result: 'b'
120
+ - pattern: (([भ]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([भ])(?=\b))
121
+ result: 'bh'
122
+ - pattern: (([म]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([म])(?=\b))
123
+ result: 'm'
124
+ - pattern: (([य]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([य])(?=\b))
125
+ result: 'y'
126
+ - pattern: (([र]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([र])(?=\b))
127
+ result: 'r'
128
+ - pattern: (([ल]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ल])(?=\b))
129
+ result: 'l'
130
+ - pattern: (([व]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([व])(?=\b))
131
+ result: 'v'
132
+ - pattern: (([श]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([श])(?=\b))
133
+ result: 'sh'
134
+ - pattern: (([ष]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ष])(?=\b))
135
+ result: 'ṣh'
136
+ - pattern: (([स]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([स])(?=\b))
137
+ result: 's'
138
+ - pattern: (([क़]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([क़])(?=\b))
139
+ result: 'q'
140
+ - pattern: (([ख़]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ख़])(?=\b))
141
+ result: 'ḳh'
142
+ - pattern: (([ग़]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ग़])(?=\b))
143
+ result: 'g'
144
+ - pattern: (([ज़]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ज़])(?=\b))
145
+ result: 'z'
146
+ - pattern: (([ड़]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ड़])(?=\b))
147
+ result: 'ṙ'
148
+ - pattern: (([ढ़]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ढ़])(?=\b))
149
+ result: 'ṙh'
150
+ - pattern: (([फ़]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([फ़])(?=\b))
151
+ result: 'f'
152
+ - pattern: (([ह]=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d]))|(([ह])(?=\b))
153
+ result: 'h'
154
+
61
155
  characters:
62
156
 
63
157
  # I. Independent vowel characters
@@ -0,0 +1,254 @@
1
+ ---
2
+ authority_id: ungegn
3
+ id: 2016
4
+ language: iso-639-2:kan
5
+ source_script: Kana
6
+ destination_script: Latn
7
+ name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Kannada Romanization, 1972
8
+ url: https://www.eki.ee/wgrs/rom1_kn.pdf
9
+ creation_date: 1972
10
+ confirmation_date: 2016
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (II/11) and amended in 1977
13
+ (III/12), based on a report prepared by D. N. Sharma. The tables and their corrections were
14
+ published in volume II of the conference reports
15
+
16
+ There is no evidence of the use of the system either in India or in international cartographic
17
+ products.
18
+
19
+ Kannada uses an alphasyllabic script whereby each character represents a syllable rather than
20
+ one sound. Vowels and diphthongs are marked in two ways: as independent characters (used
21
+ syllable-initially) and in an abbreviated form, to denote vowels after consonants. The
22
+ romanization table is unambiguous. The system is mostly reversible but there may exist some
23
+ ambiguities in the romanization of vowels (independent vs. abbreviated characters) and
24
+ consonants (combinations with subscript consonants vs. character sequences).
25
+
26
+ notes:
27
+ - |
28
+ While most consonants have a diacritic in the upper right corner of the character (like ಕ),
29
+ some do not, e.g. ಖ kha, ಜ ja, ನ na, ಬ ba, ಲ la.
30
+ - |
31
+ Combinations with r as the first component are written by adding a special symbol after the
32
+ second consonant: ರ್ಗ rga.
33
+
34
+
35
+ tests:
36
+ - source: "ಕರ್ಣಾಟಕ"
37
+ expected: "karṇāṭaka"
38
+ - source: "ಬೆಂಗಳೂರು"
39
+ expected: "bĕṁgaḷūru"
40
+ - source: "ಮಹಾರಾಷ್ಟ್ರದ ಯಾವುದೇ ಪ್ರಕರಣದ ತನಿಖೆಗೆ ಇನ್ನು ಸಿಬಿಐ ಪಡೆಯಬೇಕು ಅನುಮತಿ"
41
+ expected: "mahārāṣhṭrada yāvude prakaraṇada tanikhĕgĕ innu sibiai paḍĕyabeku anumati"
42
+ - source: "ಹರಕು ಬಾಯಿ: ಈಶ್ವರಪ್ಪಗೆ ಶಾಸಕ ಯತ್ನಾಳ ತಿರುಗೇಟು"
43
+ expected: "haraku bāyi: īshvarappagĕ shāsaka yatnāḷa tirugeṭu"
44
+ - source: "ಹಾಥರಸ್‌ ಪ್ರಕರಣ: ೨೯ರಂದು ರಾಷ್ಟ್ರವ್ಯಾಪಿ ಪ್ರತಿಭಟನೆಗೆ ಮಹಿಳಾ ಸಂಘಟನೆಗಳ ಕರೆ"
45
+ expected: "hātharas prakaraṇa: 29raṁdu rāṣhṭravyāpi pratibhaṭanĕgĕ mahiḷā saṁghaṭanĕgaḷa karĕ"
46
+ - source: "ಪೊಲೀಸ್‌ ಮಕ್ಕಳ ಶಾಲೆ ಮುಚ್ಚುವ ಯತ್ನಕ್ಕೆ ಹೊರಟ್ಟಿ ತೀವ್ರ ವಿರೋಧ"
47
+ expected: "pŏlīs makkaḷa shālĕ muchchuva yatnakkĕ hŏraṭṭi tīvra virodha"
48
+ - source: "ಅಮೆರಿಕ ಅಧ್ಯಕ್ಷೀಯ ಚುನಾವಣೆ: ಟ್ರಂಪ್‌–ಬೈಡನ್‌ ಅಂತಿಮ ಮುಖಾಮುಖಿಗೆ ವೇದಿಕೆ ಸಿದ್ಧ"
49
+ expected: "amĕrika adhyakṣhīya chunāvaṇĕ: ṭraṁp–baiḍan aṁtima mukhāmukhigĕ vedikĕ siddha"
50
+ - source: "ಅಂಜನಾದ್ರಿ ಆಂಜನೇಯನ ದರ್ಶನ ಪಡೆದ ಪವರ್ ಸ್ಟಾರ್ ಪುನೀತ್ ರಾಜ್ ಕುಮಾರ್"
51
+ expected: "aṁjanādri āṁjaneyana darshana paḍĕda pavar sṭār punīt rāj kumār"
52
+ - source: "ಇನ್ನು ಹಿಂದೂ ದೇವಸ್ಥಾನದ ಧಾರ್ಮಿಕ ಕಾರ್ಯದಲ್ಲಿ ಭಾಗಿಯಾಗಿದ್ದಕ್ಕೆ ಮೋಯಿದ್ದೀನ್ ಬಾವಾಗೆ ಬೆದರಿಕೆ ಒಡ್ಡಲಾಗಿದೆ"
53
+ expected: "innu hiṁdū devasthānada dhārmika kāryadalli bhāgiyāgiddakkĕ moyiddīn bāvāgĕ bĕdarikĕ ŏḍḍalāgidĕ"
54
+ - source: "ಇದು ಮೋದಿ ದೇಶ - ದನ ತಿಂದು ಹೋದ್ರೆ ಹುಷಾರ್ : ದೇಗುಲಕ್ಕೆ ಹೋಗಿದ್ದ ಬಾವಾಗೆ ಬೆದರಿಕೆ"
55
+ expected: "idu modi desha - dana tiṁdu hodrĕ huṣhār : degulakkĕ hogidda bāvāgĕ bĕdarikĕ"
56
+
57
+ map:
58
+
59
+ rules:
60
+ # to cover diacritic and vowel less consonants
61
+ - pattern: ([ಕ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
62
+ result: 'k'
63
+ - pattern: ([ಖ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
64
+ result: 'kh'
65
+ - pattern: ([ಗ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
66
+ result: 'g'
67
+ - pattern: ([ಘ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
68
+ result: 'gh'
69
+ - pattern: ([ಙ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
70
+ result: 'ṅ'
71
+ - pattern: ([ಚ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
72
+ result: 'ch'
73
+ - pattern: ([ಛ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
74
+ result: 'chh'
75
+ - pattern: ([ಜ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
76
+ result: 'j'
77
+ - pattern: ([ಝ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
78
+ result: 'jh'
79
+ - pattern: ([ಞ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
80
+ result: 'ñ'
81
+ - pattern: ([ಟ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
82
+ result: 'ṭ'
83
+ - pattern: ([ಠ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
84
+ result: 'ṭh'
85
+ - pattern: ([ಡ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
86
+ result: 'ḍ'
87
+ - pattern: ([ಢ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
88
+ result: 'ḍh'
89
+ - pattern: ([ಣ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
90
+ result: 'ṇ'
91
+ - pattern: ([ತ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
92
+ result: 't'
93
+ - pattern: ([ಥ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
94
+ result: 'th'
95
+ - pattern: ([ದ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
96
+ result: 'd'
97
+ - pattern: ([ಧ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
98
+ result: 'dh'
99
+ - pattern: ([ನ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
100
+ result: 'n'
101
+ - pattern: ([ಪ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
102
+ result: 'p'
103
+ - pattern: ([ಫ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
104
+ result: 'ph'
105
+ - pattern: ([ಬ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
106
+ result: 'b'
107
+ - pattern: ([ಭ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
108
+ result: 'bh'
109
+ - pattern: ([ಮ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
110
+ result: 'm'
111
+ - pattern: ([ಯ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
112
+ result: 'y'
113
+ - pattern: ([ರ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
114
+ result: 'r'
115
+ - pattern: ([ಲ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
116
+ result: 'l'
117
+ - pattern: ([ಳ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
118
+ result: 'ḷ'
119
+ - pattern: ([ವ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
120
+ result: 'v'
121
+ - pattern: ([ಶ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
122
+ result: 'sh'
123
+ - pattern: ([ಷ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
124
+ result: 'ṣh'
125
+ - pattern: ([ಸ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
126
+ result: 's'
127
+ - pattern: ([ಹ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
128
+ result: 'h'
129
+
130
+ characters:
131
+ 'ಅ': 'a'
132
+ 'ಆ': 'ā'
133
+ 'ಇ': 'i'
134
+ 'ಈ': 'ī'
135
+ 'ಉ': 'u'
136
+ 'ಊ': 'ū'
137
+ 'ಋ': 'ṛ'
138
+
139
+ 'ಎ': 'ĕ'
140
+ 'ಏ': 'e'
141
+ 'ಐ': 'ai'
142
+
143
+ 'ಒ': 'ŏ'
144
+ 'ಓ': 'o'
145
+ 'ಔ': 'au'
146
+
147
+ # Gutturals
148
+ 'ಕ': 'ka'
149
+ 'ಖ': 'kha'
150
+ 'ಗ': 'ga'
151
+ 'ಘ': 'gha'
152
+ 'ಙ': 'ṅa'
153
+
154
+ # Palatals
155
+ 'ಚ': 'cha'
156
+ 'ಛ': 'chha'
157
+ 'ಜ': 'ja'
158
+ 'ಝ': 'jha'
159
+ 'ಞ': 'ña'
160
+
161
+ # Cerebrals
162
+ 'ಟ': 'ṭa'
163
+ 'ಠ': 'ṭha'
164
+ 'ಡ': 'ḍa'
165
+ 'ಢ': 'ḍha'
166
+ 'ಣ': 'ṇa'
167
+
168
+ # Dentals
169
+ 'ತ': 'ta'
170
+ 'ಥ': 'tha'
171
+ 'ದ': 'da'
172
+ 'ಧ': 'dha'
173
+ 'ನ': 'na'
174
+
175
+ # Labials
176
+ 'ಪ': 'pa'
177
+ 'ಫ': 'pha'
178
+ 'ಬ': 'ba'
179
+ 'ಭ': 'bha'
180
+ 'ಮ': 'ma'
181
+
182
+ # Semivowels
183
+ 'ಯ': 'ya'
184
+ 'ರ': 'ra'
185
+ 'ಲ': 'la'
186
+ 'ಳ': 'ḷa'
187
+
188
+
189
+ 'ವ': 'va'
190
+
191
+ # Sibilants
192
+ 'ಶ': 'sha'
193
+ 'ಷ': 'ṣha'
194
+ 'ಸ': 'sa'
195
+
196
+
197
+ # Aspirate
198
+ 'ಹ': 'ha'
199
+
200
+
201
+ # Bisarga
202
+ 'ಃ': 'ḥ'
203
+
204
+ # Anusvāra
205
+ 'ಂ': 'ṁ'
206
+
207
+ '\u0cbc': '' #nukta
208
+
209
+ # Medials # Needed for connecting constants
210
+ 'ಾ': "ā"
211
+ 'ಿ': "i"
212
+ 'ೀ': "ī"
213
+ 'ು': "u"
214
+ 'ೂ': "ū"
215
+ 'ೃ': "ṛ"
216
+
217
+
218
+ 'ೆ': "ĕ"
219
+ 'ೇ': "e"
220
+ 'ೈ': "ai"
221
+
222
+
223
+ 'ೊ': 'ŏ'
224
+ 'ೋ': 'o'
225
+ 'ೌ': 'au'
226
+
227
+
228
+ '्': ''
229
+ '़': ''
230
+ '್': '' # used for pronounciation without vowel
231
+ "‍": '' # no need for zero with joiner
232
+ "‌": '' # no need for zero with non joiner
233
+
234
+
235
+
236
+ # Digits
237
+
238
+ '೦': '0'
239
+ '೧': '1'
240
+ '೨': '2'
241
+ '೩': '3'
242
+ '೪': '4'
243
+ '೫': '5'
244
+ '೬': '6'
245
+ '೭': '7'
246
+ '೮': '8'
247
+ '೯': '9'
248
+
249
+
250
+
251
+
252
+
253
+
254
+