interscript 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,211 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: iso-639-2:div
5
+ source_script: Thaa
6
+ destination_script: Latn
7
+ name: ALA-Library of Congress Divehi Romanization 1997 System
8
+ alias:
9
+ ogc11122:
10
+ code: div_Thaa2Latn_ALA_1997
11
+ description: Divehi ALA-Library of Congress 1997 System
12
+ url: http://catdir.loc.gov/catdir/cpso/romanization/divehi.pdf
13
+ creation_date: 1997
14
+ description: |
15
+ ALA-Library of Congress Divehi Romanization 1997 System
16
+
17
+ notes:
18
+
19
+ - |
20
+ Romanize ށް as ḫ when it doubles the following consonant or is used as a glottal stop.
21
+ aḫvana އަށްވަނަ
22
+ maśaḫ މަށަށް
23
+ - |
24
+ When used in medial position without ް (sukūn), romanize ނ as ṁ.
25
+ aṁga އަނގަ
26
+ haṁdu ހަނދު
27
+ - |
28
+ Romanization of އ.
29
+ (a) When used in the initial position with any vowel sign, do not romanize.
30
+ ata އަތަ
31
+ idu އިދު
32
+ umuru އުމުރު
33
+ egahugi އެގަހުގި
34
+ (b) When used in the medial position with any vowel sign, romanize as ’.
35
+ ha’hūnu ހައިހޫނު
36
+ fa’isa ފައިސަ
37
+ k’īn ކްއީން
38
+ (c) When a consonant follows އް in medial position, double it in romanization.
39
+ cappalu ޗައްޕަލު
40
+ appacci އައްޕައްޗި
41
+ (d) When used in final position with ް (sukūn), romanize as h.
42
+ boh ބޮއް
43
+ biheh ބިހެއް
44
+ - |
45
+ Romanize ތް followed by another ތ as t̤ .
46
+ at̤teri އަތްތެރި
47
+ - |
48
+ Only the vowel forms that appear at the beginning of a syllable are listed.
49
+ When the vowels follow a consonant, އ is not used and the vowel signs are added to the consonant forms.
50
+ Do not distinguish between the two in romanization.
51
+ - |
52
+ ް (called sukūn) generally indicates omission of an inherent vowel associated with a consonant.
53
+ For its other uses, see Notes 1, 3, and 4.
54
+
55
+ tests:
56
+ - source: "މަށަށް"
57
+ expected: "maśaḫ"
58
+ - source: "އަނގަ"
59
+ expected: "aṁga"
60
+ - source: "ހަނދު"
61
+ expected: "haṁdu"
62
+ - source: "އަތަ"
63
+ expected: "ata"
64
+ - source: "އިދު"
65
+ expected: "idu"
66
+ - source: "އުމުރު"
67
+ expected: "umuru"
68
+ - source: "އެގަހުގި"
69
+ expected: "egahugi"
70
+ - source: "ފައިސަ"
71
+ expected: "faʼisa"
72
+ - source: "ބޮއް"
73
+ expected: "boh"
74
+ - source: "ބިހެއް"
75
+ expected: "biheh"
76
+ - source: "އަތްތެރި"
77
+ expected: "at̤teri"
78
+ - source: "ޗައްޕަލު"
79
+ expected: "cappalu"
80
+ - source: "އައްޕައްޗި"
81
+ expected: "appacci"
82
+
83
+ map:
84
+
85
+ rules:
86
+ # note[1]
87
+ - pattern: (?<=)\u0781\u07b0(?=\b)
88
+ result: "ḫ"
89
+ # note[2]
90
+ - pattern: (?<!\b)\u0782\u07b0(?<!\b) # medial position with sukun
91
+ result: "n"
92
+ - pattern: (?<!\b)\u0782(?<!\b) # medial position without sukun
93
+ result: "ṁ"
94
+ # note[3(a)]
95
+ - pattern: \b(\u0787=?)(?=[\u07a6\u07a7\u07a8\u07a9\u07aa\u07ab\u07ac\u07ad\u07ae\u07af]) # initial position with any vowel sign
96
+ result: ''
97
+ # note[3(c)] a consonant follows އް in medial position
98
+ - pattern: (?<!\b)\u0787\u07b0(?=[ހ])
99
+ result: "h"
100
+ - pattern: (?<!\b)\u0787\u07b0(?=[ށ])
101
+ result: "ś"
102
+ - pattern: (?<!\b)\u0787\u07b0(?=[ނ])
103
+ result: "n"
104
+ - pattern: (?<!\b)\u0787\u07b0(?=[ރ])
105
+ result: "r"
106
+ - pattern: (?<!\b)\u0787\u07b0(?=[ބ])
107
+ result: "b"
108
+ - pattern: (?<!\b)\u0787\u07b0(?=[ޅ])
109
+ result: "ḷ"
110
+ - pattern: (?<!\b)\u0787\u07b0(?=[ކ])
111
+ result: "k"
112
+ - pattern: (?<!\b)\u0787\u07b0(?=[ވ])
113
+ result: "v"
114
+ - pattern: (?<!\b)\u0787\u07b0(?=[މ])
115
+ result: "m"
116
+ - pattern: (?<!\b)\u0787\u07b0(?=[ފ])
117
+ result: "f"
118
+ - pattern: (?<!\b)\u0787\u07b0(?=[ދ])
119
+ result: "d"
120
+ - pattern: (?<!\b)\u0787\u07b0(?=[ތ])
121
+ result: "t"
122
+ - pattern: (?<!\b)\u0787\u07b0(?=[ލ])
123
+ result: "l"
124
+ - pattern: (?<!\b)\u0787\u07b0(?=[ގ])
125
+ result: "g"
126
+ - pattern: (?<!\b)\u0787\u07b0(?=[ޏ])
127
+ result: "ñ"
128
+ - pattern: (?<!\b)\u0787\u07b0(?=[ސ])
129
+ result: "s"
130
+ - pattern: (?<!\b)\u0787\u07b0(?=[ޑ])
131
+ result: "ḍ"
132
+ - pattern: (?<!\b)\u0787\u07b0(?=[ޖ])
133
+ result: "j"
134
+ - pattern: (?<!\b)\u0787\u07b0(?=[ޗ])
135
+ result: "c"
136
+ - pattern: (?<!\b)\u0787\u07b0(?=[ޒ])
137
+ result: "z"
138
+ - pattern: (?<!\b)\u0787\u07b0(?=[ޓ])
139
+ result: "ṭ"
140
+ - pattern: (?<!\b)\u0787\u07b0(?=[ޕ])
141
+ result: "p"
142
+ - pattern: (?<!\b)\u0787\u07b0(?=[ޔ])
143
+ result: "y"
144
+ # note[3(d)]
145
+ - pattern: (?<!\b)\u0787\u07b0(?=\b) # final position with sukun
146
+ result: 'h'
147
+ # note[3(b)]
148
+ - pattern: (?<!\b)\u0787(?=[\u07a6\u07a7\u07a8\u07a9\u07aa\u07ab\u07ac\u07ad\u07ae\u07af]) # medial position with any vowel sign
149
+ result: 'ʼ'
150
+ # note[4]
151
+ - pattern: \u078c\u07b0\u078c
152
+ result: 't̤t'
153
+
154
+
155
+ characters:
156
+
157
+ # Vowels
158
+
159
+ "\u07a6" : "a"
160
+ "\u07a7" : "ā"
161
+ "\u07a8" : "i"
162
+ "\u07a9" : "ī"
163
+ "\u07aa" : "u"
164
+ "\u07ab" : "ū"
165
+ "\u07ac" : "e"
166
+ "\u07ad" : "ē"
167
+ "\u07ae" : "o"
168
+ "\u07af" : "ō"
169
+ "\u07B0" : "" # omit (see Note 6)
170
+
171
+
172
+ # Consonants
173
+ "ހ": "h"
174
+ "ށ": "ś" # or ḫ (see Note 1)
175
+ "ނ": "n" # see Note 2
176
+ "ރ": "r"
177
+ "ބ": "b"
178
+ "ޅ": "ḷ"
179
+ "ކ": "k"
180
+ "އ": "" # ʼ or h or omit (see Note 3)
181
+ "ވ": "v"
182
+ "މ": "m"
183
+ "ފ": "f"
184
+ "ދ": "d"
185
+ "ތ": "t" # see Note 4
186
+ "ލ": "l"
187
+ "ގ": "g"
188
+ "ޏ": "ñ"
189
+ "ސ": "s"
190
+ "ޑ": "ḍ"
191
+ "ޖ": "j"
192
+ "ޗ": "c"
193
+ "ޒ": "z"
194
+ "ޓ": "ṭ"
195
+ "ޕ": "p"
196
+ "ޔ": "y"
197
+
198
+ # Divehi Equivalents to Represent Arabic Letters
199
+
200
+ "ޘ": "th"
201
+ "ޙ": "ḥ"
202
+ "ޚ": "kh"
203
+ "ޛ": "dh"
204
+ "ޝ": "sh"
205
+ "ޞ": "ṣ"
206
+ "ޟ": "ḏ"
207
+ "ޠ": "t̤"
208
+ "ޡ": "ẓ"
209
+ "ޢ": "ʻ"
210
+ "ޣ": "gh"
211
+ "ޤ": "q"
@@ -46,33 +46,123 @@ tests:
46
46
  - source: "हम"
47
47
  expected: "hama"
48
48
  - source: "मीन"
49
- expected: "maīna"
49
+ expected: "mīna"
50
50
  - source: "औसत"
51
51
  expected: "ăusata"
52
52
  - source: "माँऽऽऽ!"
53
- expected: "maān̐’’’!"
53
+ expected: "mān̐’’’!"
54
54
  - source: "माँ"
55
- expected: "maām̐"
55
+ expected: "mām̐"
56
56
  - source: "गंभीर मरीजों के मामले में भारत दूसरे नंबर पर"
57
- expected: "gaṃbhaīra maraījaoṃ kae maāmalae maeṃ bhaārata daūsarae naṃbara para"
57
+ expected: "gaṃbhīr marījoṃ ke māmale meṃ bhārat dūsare naṃbar para"
58
58
  - source: "कोरोना अपडेट्स"
59
- expected: "kaoraonaā apaḍaeṭasa"
59
+ expected: "koronā apaḍeṭsa"
60
60
  - source: "सीडीसी चीफ का बयान अहम"
61
- expected: "saīḍaīsaī caīpha kaā bayaāna ahama"
61
+ expected: "sīḍīsī cīph bayān ahama"
62
62
  - source: "गूगल प्ले स्टोर पर पेटीएम की वापसी"
63
- expected: "gaūgala palae saṭaora para paeṭaīema kaī vaāpasaī"
63
+ expected: "gūgal ple sṭor par peṭīem vāpasī"
64
64
  - source: "भारत में गैंबलिंग की इजाजत नहीं"
65
- expected: "bhaārata maeṃ gaaiṃbalaiṅga kaī ijaājata nahaīṃ"
65
+ expected: "bhārat meṃ gaiṃbaliṃg ijājat nahīṃ"
66
66
  - source: "कोरोना वैक्सीन मुद्दे पर घिरे राष्ट्रपति; जो बाइडेन बोले- मुझे और देश को वैज्ञानिकों पर भरोसा है, डोनाल्ड ट्रम्प पर नहीं"
67
- expected: "kaoraonaā vaaikasaīna maudadae para ghairae raāshaṭarapatai; jao baāiḍaena baolae- maujhae ăura daeśa kao vaaijañaānaikaoṃ para bharaosaā haai, ḍaonaālaḍa ṭaramapa para nahaīṃ"
67
+ expected: "koronā vaiksīn mudde par ghire rāshṭrapati; jo bāiḍen bole- mujhe ăur deś ko vaijñānikoṃ par bharosā hai, ḍonālḍ ṭramp par nahīṃ"
68
68
  - source: "गूगल की कार्रवाई पर पेटीएम ने कहा था कि ऐप को अस्थायी तौर पर प्ले-स्टोर से हटाया गया है, आपके पैसे सुरक्षित हैं"
69
- expected: "gaūgala kaī kaāraravaāī para paeṭaīema nae kahaā thaā kai aipa kao asathaāyaī taăura para palae-saṭaora sae haṭaāyaā gayaā haai, āpakae paaisae saurakashaita haaiṃ"
69
+ expected: "gūgal kārravāī par peṭīem ne kahā thā ki aip ko asthāyī tăur par ple-sṭor se haṭāyā gayā hai, āpake paise surakshit haiṃ"
70
70
  - source: "२५६८७५४४६४४६१६११"
71
71
  expected: "2568754464461611"
72
72
 
73
73
  map:
74
74
 
75
75
  rules:
76
+ # note[2]
77
+ - pattern: (क=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
78
+ result: 'k'
79
+ - pattern: (क़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
80
+ result: 'q'
81
+ - pattern: (ख=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
82
+ result: 'kh'
83
+ - pattern: (ख़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
84
+ result: 'kh'
85
+ - pattern: (ग=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
86
+ result: 'g'
87
+ - pattern: (ग़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
88
+ result: 'gh'
89
+ - pattern: (घ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
90
+ result: 'gh'
91
+ - pattern: (ङ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
92
+ result: 'ṅ'
93
+ - pattern: (च=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
94
+ result: 'c'
95
+ - pattern: (छ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
96
+ result: 'ch'
97
+ - pattern: (ज=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
98
+ result: 'j'
99
+ - pattern: (ज़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
100
+ result: 'j'
101
+ - pattern: (झ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
102
+ result: 'jh'
103
+ - pattern: (ञ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
104
+ result: 'ñ'
105
+ - pattern: (ट=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
106
+ result: 'ṭ'
107
+ - pattern: (ट़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
108
+ result: 't̤'
109
+ - pattern: (ठ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
110
+ result: 'ṭh'
111
+ - pattern: (ड=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
112
+ result: 'ḍ'
113
+ - pattern: (ड़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
114
+ result: 'ṛ'
115
+ - pattern: (ड़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
116
+ result: 'ṛ'
117
+ - pattern: (ढ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
118
+ result: 'ḍh'
119
+ - pattern: (ढ़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
120
+ result: 'ṛh'
121
+ - pattern: (ण=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
122
+ result: 'ṇ'
123
+ - pattern: (त=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
124
+ result: 't'
125
+ - pattern: (थ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
126
+ result: 'th'
127
+ - pattern: (द=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
128
+ result: 'd'
129
+ - pattern: (ध=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
130
+ result: 'dh'
131
+ - pattern: (न=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
132
+ result: 'n'
133
+ - pattern: (प=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
134
+ result: 'p'
135
+ - pattern: (फ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
136
+ result: 'ph'
137
+ - pattern: (फ़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
138
+ result: 'ph'
139
+ - pattern: (ब=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
140
+ result: 'b'
141
+ - pattern: (भ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
142
+ result: 'bh'
143
+ - pattern: (म=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
144
+ result: 'm'
145
+ - pattern: (य=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
146
+ result: 'y'
147
+ - pattern: (र=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
148
+ result: 'r'
149
+ - pattern: (ल=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
150
+ result: 'l'
151
+ - pattern: (व=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
152
+ result: 'v'
153
+ - pattern: (श=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
154
+ result: 'ś'
155
+ - pattern: (ष=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
156
+ result: 'sh'
157
+ - pattern: (स=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
158
+ result: 's'
159
+ - pattern: (स़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
160
+ result: 's̤'
161
+ - pattern: (ह=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
162
+ result: 'h'
163
+ - pattern: (ह़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
164
+ result: 'h'
165
+
76
166
  # note[3]
77
167
  - pattern: \u0902(?=[कक़खख़गग़घङ])
78
168
  result: ṅ
@@ -134,6 +224,7 @@ map:
134
224
  'ठ': 'ṭha'
135
225
  'ड': 'ḍa'
136
226
  'ड़': 'ṛa'
227
+ 'ड़': 'ṛa'
137
228
  'ढ': 'ḍha'
138
229
  'ढ़': 'ṛha'
139
230
  'ण': 'ṇa'
@@ -196,6 +287,7 @@ map:
196
287
  'ै': "ai"
197
288
  'ो': "o"
198
289
  '्': ""
290
+ '़': ""
199
291
 
200
292
  # digits
201
293
 
@@ -40,7 +40,25 @@ notes:
40
40
 
41
41
  tests:
42
42
  - source: "इस चुनौतीपूर्ण समय में 'वर्क फ्रॉम होम’ सामान्य बन चुका है"
43
- expected: "isa caunaăutaīpaūraṇa samaya maeṃ 'varaka pharaôma haomasaāmaānaya bana caukaā haai"
43
+ expected: "is cunăutīpūrṇ samay meṃ 'vark phrôm homasāmāny ban cukā hai"
44
+ - source: "दिल्ली में त्योहार पर खरीददारी करने निकले बड़ी संख्या में लोग, कई जगहों पर लगा भीषण जाम"
45
+ expected: "dillī meṃ tyohār par kharīdadārī karane nikale baṛī saṃkhyā meṃ loga, kaī jagahoṃ par lagā bhīshaṇ jāma"
46
+ - source: "सरकार ने पेंशन भोगियों को लाइफ सर्टिफिकेट जमा कराने के मामले में दी बड़ी राहत"
47
+ expected: "sarakār ne peṃśan bhogiyoṃ ko lāiph sarṭiphikeṭ jamā karāne ke māmale meṃ dī baṛī rāhata"
48
+ - source: "कांग्रेस ने माना उसके लचर प्रदर्शन ने डुबोई महागठबंधन की लुटिया, पार्टी में उठने लगी आत्ममंथन की आवाज"
49
+ expected: "kāṃgres ne mānā usake lacar pradarśan ne ḍuboī mahāgaṭhabandhan kī luṭiyā, pārṭī meṃ uṭhane lagī ātmamanthan kī āvāja"
50
+ - source: "डिजिटल पेमेंट सिस्टम ने छोटे-मध्यम कारोबारों का दिया साथ, कोरोना की परेशानियों को किया कम"
51
+ expected: "ḍijiṭal pemeṃṭ sisṭam ne choṭe-madhyam kārobāroṃ kā diyā sātha, koronā kī pareśāniyoṃ ko kiyā kama"
52
+ - source: "छोटे व्यापारियों को ढूंढें, उनसे खरीदें और उनका साथ दें"
53
+ expected: "choṭe vyāpāriyoṃ ko ḍhūṃḍheṃ, unase kharīdeṃ ăur unakā sāth deṃ"
54
+ - source: "भारत के साथ साझीदारी को महत्व देंगे बाइडन, ओबामा प्रशासन में रहीं वरिष्ठ अधिकारी एलिसा ने जताई उम्मीद"
55
+ expected: "bhārat ke sāth sājhīdārī ko mahatv deṃge bāiḍana, obāmā praśāsan meṃ rahīṃ varishṭh adhikārī elisā ne jatāī ummīda"
56
+ - source: "दो महीने से कोमा में था युवक, चिकन की चर्चा सुनते ही आया होश"
57
+ expected: "do mahīne se komā meṃ thā yuvaka, cikan kī carcā sunate hī āyā hośa"
58
+ - source: "कोरोना के टीके पर खुशखबरी, भारत पहुंची रूसी वैक्सीन की पहली खेप"
59
+ expected: "koronā ke ṭīke par khuśakhabarī, bhārat pahuṃcī rūsī vaiksīn kī pahalī khepa"
60
+ - source: "दिल्ली के गांधी नगर स्थित एक दुकान में लगी भीषण आग, दमकल की 20 गाड़ियां मौके पर"
61
+ expected: "dillī ke gāṃdhī nagar sthit ek dukān meṃ lagī bhīshaṇ āga, damakal kī 20 gāṛiyāṃ măuke para"
44
62
 
45
63
  map:
46
64
 
@@ -0,0 +1,274 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: iso-639-2:kan
5
+ source_script: Kana
6
+ destination_script: Latn
7
+ name: Kannada Romanization, 1997
8
+ url: http://catdir.loc.gov/catdir/cpso/romanization/kannada.pdf
9
+ creation_date: 1997
10
+ description: |
11
+ ALA-LC Romanization table for Kannada
12
+
13
+ notes:
14
+
15
+ - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
16
+ vowels following a consonant can be found in grammars; no distinction between the two is
17
+ made in transliteration.
18
+
19
+ - |
20
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
21
+ transliteration, with the following exceptions:
22
+
23
+ a) when another vowel is indicated by its appropriate sign; and
24
+ b) when the absence of any vowel is indicated by the superscript sign (◌್).
25
+
26
+ - |
27
+ Exception: Anusvāra is transliterated by:
28
+
29
+ a) ṅ before gutturals,
30
+ b) ñ before palatals,
31
+ c) ṇ before cerebrals,
32
+ d) n before dentals, and
33
+ e) m before labials.
34
+
35
+
36
+ tests:
37
+ - source: "ಕರ್ಣಾಟಕ"
38
+ expected: "karṇāṭaka"
39
+ - source: "ಬೆಂಗಳೂರು"
40
+ expected: "beṅgaḷūru"
41
+ - source: "ಉಡುಪಿಯಲ್ಲಿ ಪ್ರಪ್ರಥಮ ಬಾರಿಗೆ ಪ್ರಾರಂಭವಾಗಿರುವ ದೇಶಿ ಉತ್ಪನ್ನಗಳ ಮಳಿಗೆ"
42
+ expected: "uḍupiyalli praprathama bārige prāraṃbhavāgiruva dēśi utpannagaḷa maḷige"
43
+ - source: "ದೇವರ ಹೆಸರು ಬಳಸಿ ಆನ್‌ಲೈನ್‌ ಬೆಟ್ಟಿಂಗ್‌!"
44
+ expected: "dēvara hesaru baḷasi ānlain beṭṭiṃg!"
45
+ - source: "ಚಿಕ್ಕಮಗಳೂರು : ಪುಷ್ಪ ಸಮರ್ಪಣೆ ವೇಳೆ ಮಗಳನ್ನ ನೆನೆದು ಕಣ್ಣೀರಿಟ್ಟ ಮೃತ ಪೇದೆ ತಾಯಿ"
46
+ expected: "cikkamagaḷūru : puṣpa samarpaṇe vēḷe magaḷanna nenedu kaṇṇīriṭṭa mṛta pēde tāyi"
47
+ - source: "ಸ್ವಾಮಿತ್ವ: ಹೊಸ ಯೋಜನೆಯಿಂದ ನಮಗೆ ಏನು ಲಾಭ ?"
48
+ expected: "svāmitva: hosa yōjaneyinda namage ēnu lābha ?"
49
+ - source: "ಮರಳು ಸಾಗಾಣಿಕೆ ವ್ಯವಹಾರ ಆಗಬಾರದು :ಅಧಿಕಾರಿಗಳಿಗೆ ಖಡಕ್ ಸೂಚನೆ ನೀಡಿದ ಜಿಲ್ಲಾಧಿಕಾರಿ"
50
+ expected: "maraḷu sāgāṇike vyavahāra āgabāradu :adhikārigaḷige khaḍak sūcane nīḍida jillādhikāri"
51
+ - source: "ಹಾವೇರಿ ಜಿಲ್ಲೆಯಲ್ಲಿ ೯೭ ಜನರಲ್ಲಿ ಕೋವಿಡ್ ಸೋಂಕು ಪತ್ತೆ ; 54 ಮಂದಿ ಗುಣಮುಖ"
52
+ expected: "hāvēri jilleyalli 97 janaralli kōviḍ sōṃku patte ; 54 maṃdi guṇamukha"
53
+ - source: "ಸಿಂದಗಿ ಐಸಿಐಸಿಐ ಬ್ಯಾಂಕ್ ಸೆಕ್ಯುರಿಟಿ ಗಾರ್ಡ್ ಹತ್ಯೆ ಪ್ರಕರಣ ಭೇದಿಸಿದ ಪೊಲೀಸರು"
54
+ expected: "sindagi aisiaisiai byāṃk sekyuriṭi gārḍ hatye prakaraṇa bhēdisida polīsaru"
55
+ - source: "ಬ್ಯಾಂಕರ್‌ಗಳೊಂದಿಗೆ ಡಿವಿ ಸಭೆ : ಆಧ್ಯತಾ ವಲಯ, ಸಾಲ ಯೋಜನೆ ತ್ವರಿತ ಮಂಜೂರಿಗೆ ಸೂಚನೆ"
56
+ expected: "byāṅkargaḷoṃdige ḍivi sabhe : ādhyatā valaya, sāla yōjane tvarita maṃjūrige sūcane"
57
+ - source: "ಪೊಲೀಸ್‌ ಇಲಾಖೆ ಸಮಗ್ರ ಅಭಿವೃದ್ಧಿ; ಡಿಜಿಪಿ ನೇತೃತ್ವದಲ್ಲಿ ಸಮಿತಿ ರಚನೆ: ಬೊಮ್ಮಾಯಿ"
58
+ expected: "polīs ilākhe samagra abhivṛddhi; ḍijipi nētṛtvadalli samiti racane: beūmmāyi"
59
+ - source: "ಕೆಟ್ಟಿರುವ ರಸ್ತೆಗಳ ದುರಸ್ತಿಗೆ ಸರಕಾರದ ಯೋಜನೆ"
60
+ expected: "keṭṭiruva rastegaḷa durastige sarakārada yōjane"
61
+
62
+ map:
63
+
64
+
65
+ rules:
66
+ # to cover diacritic and vowel less consonants rule II
67
+ - pattern: ([ಕ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
68
+ result: 'k'
69
+ - pattern: ([ಖ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
70
+ result: 'kh'
71
+ - pattern: ([ಗ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
72
+ result: 'g'
73
+ - pattern: ([ಘ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
74
+ result: 'gh'
75
+ - pattern: ([ಙ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
76
+ result: 'ṅ'
77
+ - pattern: ([ಚ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
78
+ result: 'c'
79
+ - pattern: ([ಛ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
80
+ result: 'ch'
81
+ - pattern: ([ಜ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
82
+ result: 'j'
83
+ - pattern: ([ಝ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
84
+ result: 'jh'
85
+ - pattern: ([ಞ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
86
+ result: 'ñ'
87
+ - pattern: ([ಟ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
88
+ result: 'ṭ'
89
+ - pattern: ([ಠ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
90
+ result: 'ṭh'
91
+ - pattern: ([ಡ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
92
+ result: 'ḍ'
93
+ - pattern: ([ಢ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
94
+ result: 'ḍh'
95
+ - pattern: ([ಣ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
96
+ result: 'ṇ'
97
+ - pattern: ([ತ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
98
+ result: 't'
99
+ - pattern: ([ಥ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
100
+ result: 'th'
101
+ - pattern: ([ದ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
102
+ result: 'd'
103
+ - pattern: ([ಧ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
104
+ result: 'dh'
105
+ - pattern: ([ನ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
106
+ result: 'n'
107
+ - pattern: ([ಪ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
108
+ result: 'p'
109
+ - pattern: ([ಫ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
110
+ result: 'ph'
111
+ - pattern: ([ಬ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
112
+ result: 'b'
113
+ - pattern: ([ಭ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
114
+ result: 'bh'
115
+ - pattern: ([ಮ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
116
+ result: 'm'
117
+ - pattern: ([ಯ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
118
+ result: 'y'
119
+ - pattern: ([ರ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
120
+ result: 'r'
121
+ - pattern: ([ಱ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
122
+ result: 'ṟ'
123
+ - pattern: ([ಲ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
124
+ result: 'l'
125
+ - pattern: ([ಳ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
126
+ result: 'ḷ'
127
+ - pattern: ([ೞ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
128
+ result: 'l̤'
129
+ - pattern: ([ವ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
130
+ result: 'v'
131
+ - pattern: ([ಶ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
132
+ result: 'ś'
133
+ - pattern: ([ಷ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
134
+ result: 'ṣ'
135
+ - pattern: ([ಸ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
136
+ result: 's'
137
+ - pattern: ([ಹ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
138
+ result: 'h'
139
+
140
+ # rule[III]
141
+ - pattern: \u0C82(?=[ಕಖಗಘಙ])
142
+ result: ṅ
143
+ - pattern: \u0C82(?=[ಚಛಜಝಞ])
144
+ result: ñ
145
+ - pattern: \u0C82(?=[ಟಠಡಢಣ])
146
+ result: ṇ
147
+ - pattern: \u0C82(?=[ತಥದಧನ])
148
+ result: n
149
+
150
+ characters:
151
+ 'ಅ': 'a'
152
+ 'ಆ': 'ā'
153
+ 'ಇ': 'i'
154
+ 'ಈ': 'ī'
155
+ 'ಉ': 'u'
156
+ 'ಊ': 'ū'
157
+ 'ಋ': 'ṛ'
158
+ 'ೠ': 'ṝ'
159
+
160
+ 'ಌ': 'ḻ'
161
+
162
+ 'ಎ': 'e'
163
+ 'ಏ': 'ē'
164
+ 'ಐ': 'ai'
165
+
166
+ 'ಒ': 'o'
167
+ 'ಓ': 'ō'
168
+ 'ಔ': 'au'
169
+
170
+ # Gutturals
171
+ 'ಕ': 'ka'
172
+ 'ಖ': 'kha'
173
+ 'ಗ': 'ga'
174
+ 'ಘ': 'gha'
175
+ 'ಙ': 'ṅa'
176
+
177
+ # Palatals
178
+ 'ಚ': 'ca'
179
+ 'ಛ': 'cha'
180
+ 'ಜ': 'ja'
181
+ 'ಝ': 'jha'
182
+ 'ಞ': 'ña'
183
+
184
+ # Cerebrals
185
+ 'ಟ': 'ṭa'
186
+ 'ಠ': 'ṭha'
187
+ 'ಡ': 'ḍa'
188
+ 'ಢ': 'ḍha'
189
+ 'ಣ': 'ṇa'
190
+
191
+ # Dentals
192
+ 'ತ': 'ta'
193
+ 'ಥ': 'tha'
194
+ 'ದ': 'da'
195
+ 'ಧ': 'dha'
196
+ 'ನ': 'na'
197
+
198
+ # Labials
199
+ 'ಪ': 'pa'
200
+ 'ಫ': 'pha'
201
+ 'ಬ': 'ba'
202
+ 'ಭ': 'bha'
203
+ 'ಮ': 'ma'
204
+
205
+ # Semivowels
206
+ 'ಯ': 'ya'
207
+ 'ರ': 'ra'
208
+ 'ಱ': 'ṟa'
209
+ 'ಲ': 'la'
210
+ 'ಳ': 'ḷa'
211
+ 'ೞ': 'l̤a'
212
+
213
+
214
+ 'ವ': 'va'
215
+
216
+ # Sibilants
217
+ 'ಶ': 'śa'
218
+ 'ಷ': 'ṣa'
219
+ 'ಸ': 'sa'
220
+
221
+
222
+ # Aspirate
223
+ 'ಹ': 'ha'
224
+
225
+
226
+ # Bisarga
227
+ 'ಃ': 'ḥ'
228
+
229
+ # Anusvāra
230
+ 'ಂ': 'ṃ'
231
+
232
+ '\u0cbc': '' #nukta
233
+
234
+ # Medials # Needed for connecting constants
235
+ 'ಾ': "ā"
236
+ 'ಿ': "i"
237
+ 'ೀ': "ī"
238
+ 'ು': "u"
239
+ 'ೂ': "ū"
240
+ 'ೃ': "ṛ"
241
+ 'ೄ': "ṝ"
242
+ '\u0CE2': 'ḻ' # KANNADA VOWEL SIGN VOCALIC L ( ೢ)
243
+
244
+
245
+ 'ೆ': "e"
246
+ 'ೇ': "ē"
247
+ 'ೈ': "ai"
248
+
249
+
250
+ 'ೊ': 'o'
251
+ 'ೋ': 'ō'
252
+ 'ೌ': 'au'
253
+
254
+
255
+ '्': ''
256
+ '़': ''
257
+ '್': '' # used for pronounciation without vowel
258
+ "‍": '' # no need for zero with joiner
259
+ "‌": '' # no need for zero with non joiner
260
+
261
+
262
+
263
+ # Digits
264
+
265
+ '೦': '0'
266
+ '೧': '1'
267
+ '೨': '2'
268
+ '೩': '3'
269
+ '೪': '4'
270
+ '೫': '5'
271
+ '೬': '6'
272
+ '೭': '7'
273
+ '೮': '8'
274
+ '೯': '9'