interscript 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +1 -3
- data/aliases.json +1 -0
- data/lib/interscript.rb +8 -3
- data/lib/interscript/fs.rb +27 -0
- data/lib/interscript/mapping.rb +3 -1
- data/lib/interscript/opal.rb +142 -3
- data/lib/interscript/opal/entrypoint.rb +8 -0
- data/lib/interscript/opal/exports.rb +11 -0
- data/lib/interscript/opal/maps.js.erb +2 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
- data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
- data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
- data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
- data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
- data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
- data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
- data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
- data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
- data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
- data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
- data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
- data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
- data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
- data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
- data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
- data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
- data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
- data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
- data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
- data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
- data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
- data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
- data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
- data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
- data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
- data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
- data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
- data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
- data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
- data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
- data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
- data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
- data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
- data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
- data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
- data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
- data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
- data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
- data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
- data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
- data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
- data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
- data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
- data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
- data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
- data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
- data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
- data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
- data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
- data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
- data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
- data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
- data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
- data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
- data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
- data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
- data/spec/interscript/filenames_spec.rb +6 -369
- data/spec/interscript_spec.rb +10 -2
- metadata +50 -7
- data/lib/interscript/opal/map_translate.rb +0 -7
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: alalc
|
|
3
|
+
id: 1997
|
|
4
|
+
language: iso-639-2:div
|
|
5
|
+
source_script: Thaa
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: ALA-Library of Congress Divehi Romanization 1997 System
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: div_Thaa2Latn_ALA_1997
|
|
11
|
+
description: Divehi ALA-Library of Congress 1997 System
|
|
12
|
+
url: http://catdir.loc.gov/catdir/cpso/romanization/divehi.pdf
|
|
13
|
+
creation_date: 1997
|
|
14
|
+
description: |
|
|
15
|
+
ALA-Library of Congress Divehi Romanization 1997 System
|
|
16
|
+
|
|
17
|
+
notes:
|
|
18
|
+
|
|
19
|
+
- |
|
|
20
|
+
Romanize ށް as ḫ when it doubles the following consonant or is used as a glottal stop.
|
|
21
|
+
aḫvana އަށްވަނަ
|
|
22
|
+
maśaḫ މަށަށް
|
|
23
|
+
- |
|
|
24
|
+
When used in medial position without ް (sukūn), romanize ނ as ṁ.
|
|
25
|
+
aṁga އަނގަ
|
|
26
|
+
haṁdu ހަނދު
|
|
27
|
+
- |
|
|
28
|
+
Romanization of އ.
|
|
29
|
+
(a) When used in the initial position with any vowel sign, do not romanize.
|
|
30
|
+
ata އަތަ
|
|
31
|
+
idu އިދު
|
|
32
|
+
umuru އުމުރު
|
|
33
|
+
egahugi އެގަހުގި
|
|
34
|
+
(b) When used in the medial position with any vowel sign, romanize as ’.
|
|
35
|
+
ha’hūnu ހައިހޫނު
|
|
36
|
+
fa’isa ފައިސަ
|
|
37
|
+
k’īn ކްއީން
|
|
38
|
+
(c) When a consonant follows އް in medial position, double it in romanization.
|
|
39
|
+
cappalu ޗައްޕަލު
|
|
40
|
+
appacci އައްޕައްޗި
|
|
41
|
+
(d) When used in final position with ް (sukūn), romanize as h.
|
|
42
|
+
boh ބޮއް
|
|
43
|
+
biheh ބިހެއް
|
|
44
|
+
- |
|
|
45
|
+
Romanize ތް followed by another ތ as t̤ .
|
|
46
|
+
at̤teri އަތްތެރި
|
|
47
|
+
- |
|
|
48
|
+
Only the vowel forms that appear at the beginning of a syllable are listed.
|
|
49
|
+
When the vowels follow a consonant, އ is not used and the vowel signs are added to the consonant forms.
|
|
50
|
+
Do not distinguish between the two in romanization.
|
|
51
|
+
- |
|
|
52
|
+
ް (called sukūn) generally indicates omission of an inherent vowel associated with a consonant.
|
|
53
|
+
For its other uses, see Notes 1, 3, and 4.
|
|
54
|
+
|
|
55
|
+
tests:
|
|
56
|
+
- source: "މަށަށް"
|
|
57
|
+
expected: "maśaḫ"
|
|
58
|
+
- source: "އަނގަ"
|
|
59
|
+
expected: "aṁga"
|
|
60
|
+
- source: "ހަނދު"
|
|
61
|
+
expected: "haṁdu"
|
|
62
|
+
- source: "އަތަ"
|
|
63
|
+
expected: "ata"
|
|
64
|
+
- source: "އިދު"
|
|
65
|
+
expected: "idu"
|
|
66
|
+
- source: "އުމުރު"
|
|
67
|
+
expected: "umuru"
|
|
68
|
+
- source: "އެގަހުގި"
|
|
69
|
+
expected: "egahugi"
|
|
70
|
+
- source: "ފައިސަ"
|
|
71
|
+
expected: "faʼisa"
|
|
72
|
+
- source: "ބޮއް"
|
|
73
|
+
expected: "boh"
|
|
74
|
+
- source: "ބިހެއް"
|
|
75
|
+
expected: "biheh"
|
|
76
|
+
- source: "އަތްތެރި"
|
|
77
|
+
expected: "at̤teri"
|
|
78
|
+
- source: "ޗައްޕަލު"
|
|
79
|
+
expected: "cappalu"
|
|
80
|
+
- source: "އައްޕައްޗި"
|
|
81
|
+
expected: "appacci"
|
|
82
|
+
|
|
83
|
+
map:
|
|
84
|
+
|
|
85
|
+
rules:
|
|
86
|
+
# note[1]
|
|
87
|
+
- pattern: (?<=)\u0781\u07b0(?=\b)
|
|
88
|
+
result: "ḫ"
|
|
89
|
+
# note[2]
|
|
90
|
+
- pattern: (?<!\b)\u0782\u07b0(?<!\b) # medial position with sukun
|
|
91
|
+
result: "n"
|
|
92
|
+
- pattern: (?<!\b)\u0782(?<!\b) # medial position without sukun
|
|
93
|
+
result: "ṁ"
|
|
94
|
+
# note[3(a)]
|
|
95
|
+
- pattern: \b(\u0787=?)(?=[\u07a6\u07a7\u07a8\u07a9\u07aa\u07ab\u07ac\u07ad\u07ae\u07af]) # initial position with any vowel sign
|
|
96
|
+
result: ''
|
|
97
|
+
# note[3(c)] a consonant follows އް in medial position
|
|
98
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ހ])
|
|
99
|
+
result: "h"
|
|
100
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ށ])
|
|
101
|
+
result: "ś"
|
|
102
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ނ])
|
|
103
|
+
result: "n"
|
|
104
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ރ])
|
|
105
|
+
result: "r"
|
|
106
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ބ])
|
|
107
|
+
result: "b"
|
|
108
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ޅ])
|
|
109
|
+
result: "ḷ"
|
|
110
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ކ])
|
|
111
|
+
result: "k"
|
|
112
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ވ])
|
|
113
|
+
result: "v"
|
|
114
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[މ])
|
|
115
|
+
result: "m"
|
|
116
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ފ])
|
|
117
|
+
result: "f"
|
|
118
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ދ])
|
|
119
|
+
result: "d"
|
|
120
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ތ])
|
|
121
|
+
result: "t"
|
|
122
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ލ])
|
|
123
|
+
result: "l"
|
|
124
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ގ])
|
|
125
|
+
result: "g"
|
|
126
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ޏ])
|
|
127
|
+
result: "ñ"
|
|
128
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ސ])
|
|
129
|
+
result: "s"
|
|
130
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ޑ])
|
|
131
|
+
result: "ḍ"
|
|
132
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ޖ])
|
|
133
|
+
result: "j"
|
|
134
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ޗ])
|
|
135
|
+
result: "c"
|
|
136
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ޒ])
|
|
137
|
+
result: "z"
|
|
138
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ޓ])
|
|
139
|
+
result: "ṭ"
|
|
140
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ޕ])
|
|
141
|
+
result: "p"
|
|
142
|
+
- pattern: (?<!\b)\u0787\u07b0(?=[ޔ])
|
|
143
|
+
result: "y"
|
|
144
|
+
# note[3(d)]
|
|
145
|
+
- pattern: (?<!\b)\u0787\u07b0(?=\b) # final position with sukun
|
|
146
|
+
result: 'h'
|
|
147
|
+
# note[3(b)]
|
|
148
|
+
- pattern: (?<!\b)\u0787(?=[\u07a6\u07a7\u07a8\u07a9\u07aa\u07ab\u07ac\u07ad\u07ae\u07af]) # medial position with any vowel sign
|
|
149
|
+
result: 'ʼ'
|
|
150
|
+
# note[4]
|
|
151
|
+
- pattern: \u078c\u07b0\u078c
|
|
152
|
+
result: 't̤t'
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
characters:
|
|
156
|
+
|
|
157
|
+
# Vowels
|
|
158
|
+
|
|
159
|
+
"\u07a6" : "a"
|
|
160
|
+
"\u07a7" : "ā"
|
|
161
|
+
"\u07a8" : "i"
|
|
162
|
+
"\u07a9" : "ī"
|
|
163
|
+
"\u07aa" : "u"
|
|
164
|
+
"\u07ab" : "ū"
|
|
165
|
+
"\u07ac" : "e"
|
|
166
|
+
"\u07ad" : "ē"
|
|
167
|
+
"\u07ae" : "o"
|
|
168
|
+
"\u07af" : "ō"
|
|
169
|
+
"\u07B0" : "" # omit (see Note 6)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# Consonants
|
|
173
|
+
"ހ": "h"
|
|
174
|
+
"ށ": "ś" # or ḫ (see Note 1)
|
|
175
|
+
"ނ": "n" # see Note 2
|
|
176
|
+
"ރ": "r"
|
|
177
|
+
"ބ": "b"
|
|
178
|
+
"ޅ": "ḷ"
|
|
179
|
+
"ކ": "k"
|
|
180
|
+
"އ": "" # ʼ or h or omit (see Note 3)
|
|
181
|
+
"ވ": "v"
|
|
182
|
+
"މ": "m"
|
|
183
|
+
"ފ": "f"
|
|
184
|
+
"ދ": "d"
|
|
185
|
+
"ތ": "t" # see Note 4
|
|
186
|
+
"ލ": "l"
|
|
187
|
+
"ގ": "g"
|
|
188
|
+
"ޏ": "ñ"
|
|
189
|
+
"ސ": "s"
|
|
190
|
+
"ޑ": "ḍ"
|
|
191
|
+
"ޖ": "j"
|
|
192
|
+
"ޗ": "c"
|
|
193
|
+
"ޒ": "z"
|
|
194
|
+
"ޓ": "ṭ"
|
|
195
|
+
"ޕ": "p"
|
|
196
|
+
"ޔ": "y"
|
|
197
|
+
|
|
198
|
+
# Divehi Equivalents to Represent Arabic Letters
|
|
199
|
+
|
|
200
|
+
"ޘ": "th"
|
|
201
|
+
"ޙ": "ḥ"
|
|
202
|
+
"ޚ": "kh"
|
|
203
|
+
"ޛ": "dh"
|
|
204
|
+
"ޝ": "sh"
|
|
205
|
+
"ޞ": "ṣ"
|
|
206
|
+
"ޟ": "ḏ"
|
|
207
|
+
"ޠ": "t̤"
|
|
208
|
+
"ޡ": "ẓ"
|
|
209
|
+
"ޢ": "ʻ"
|
|
210
|
+
"ޣ": "gh"
|
|
211
|
+
"ޤ": "q"
|
|
@@ -46,33 +46,123 @@ tests:
|
|
|
46
46
|
- source: "हम"
|
|
47
47
|
expected: "hama"
|
|
48
48
|
- source: "मीन"
|
|
49
|
-
expected: "
|
|
49
|
+
expected: "mīna"
|
|
50
50
|
- source: "औसत"
|
|
51
51
|
expected: "ăusata"
|
|
52
52
|
- source: "माँऽऽऽ!"
|
|
53
|
-
expected: "
|
|
53
|
+
expected: "mān̐’’’!"
|
|
54
54
|
- source: "माँ"
|
|
55
|
-
expected: "
|
|
55
|
+
expected: "mām̐"
|
|
56
56
|
- source: "गंभीर मरीजों के मामले में भारत दूसरे नंबर पर"
|
|
57
|
-
expected: "
|
|
57
|
+
expected: "gaṃbhīr marījoṃ ke māmale meṃ bhārat dūsare naṃbar para"
|
|
58
58
|
- source: "कोरोना अपडेट्स"
|
|
59
|
-
expected: "
|
|
59
|
+
expected: "koronā apaḍeṭsa"
|
|
60
60
|
- source: "सीडीसी चीफ का बयान अहम"
|
|
61
|
-
expected: "
|
|
61
|
+
expected: "sīḍīsī cīph kā bayān ahama"
|
|
62
62
|
- source: "गूगल प्ले स्टोर पर पेटीएम की वापसी"
|
|
63
|
-
expected: "
|
|
63
|
+
expected: "gūgal ple sṭor par peṭīem kī vāpasī"
|
|
64
64
|
- source: "भारत में गैंबलिंग की इजाजत नहीं"
|
|
65
|
-
expected: "
|
|
65
|
+
expected: "bhārat meṃ gaiṃbaliṃg kī ijājat nahīṃ"
|
|
66
66
|
- source: "कोरोना वैक्सीन मुद्दे पर घिरे राष्ट्रपति; जो बाइडेन बोले- मुझे और देश को वैज्ञानिकों पर भरोसा है, डोनाल्ड ट्रम्प पर नहीं"
|
|
67
|
-
expected: "
|
|
67
|
+
expected: "koronā vaiksīn mudde par ghire rāshṭrapati; jo bāiḍen bole- mujhe ăur deś ko vaijñānikoṃ par bharosā hai, ḍonālḍ ṭramp par nahīṃ"
|
|
68
68
|
- source: "गूगल की कार्रवाई पर पेटीएम ने कहा था कि ऐप को अस्थायी तौर पर प्ले-स्टोर से हटाया गया है, आपके पैसे सुरक्षित हैं"
|
|
69
|
-
expected: "
|
|
69
|
+
expected: "gūgal kī kārravāī par peṭīem ne kahā thā ki aip ko asthāyī tăur par ple-sṭor se haṭāyā gayā hai, āpake paise surakshit haiṃ"
|
|
70
70
|
- source: "२५६८७५४४६४४६१६११"
|
|
71
71
|
expected: "2568754464461611"
|
|
72
72
|
|
|
73
73
|
map:
|
|
74
74
|
|
|
75
75
|
rules:
|
|
76
|
+
# note[2]
|
|
77
|
+
- pattern: (क=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
78
|
+
result: 'k'
|
|
79
|
+
- pattern: (क़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
80
|
+
result: 'q'
|
|
81
|
+
- pattern: (ख=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
82
|
+
result: 'kh'
|
|
83
|
+
- pattern: (ख़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
84
|
+
result: 'kh'
|
|
85
|
+
- pattern: (ग=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
86
|
+
result: 'g'
|
|
87
|
+
- pattern: (ग़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
88
|
+
result: 'gh'
|
|
89
|
+
- pattern: (घ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
90
|
+
result: 'gh'
|
|
91
|
+
- pattern: (ङ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
92
|
+
result: 'ṅ'
|
|
93
|
+
- pattern: (च=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
94
|
+
result: 'c'
|
|
95
|
+
- pattern: (छ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
96
|
+
result: 'ch'
|
|
97
|
+
- pattern: (ज=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
98
|
+
result: 'j'
|
|
99
|
+
- pattern: (ज़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
100
|
+
result: 'j'
|
|
101
|
+
- pattern: (झ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
102
|
+
result: 'jh'
|
|
103
|
+
- pattern: (ञ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
104
|
+
result: 'ñ'
|
|
105
|
+
- pattern: (ट=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
106
|
+
result: 'ṭ'
|
|
107
|
+
- pattern: (ट़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
108
|
+
result: 't̤'
|
|
109
|
+
- pattern: (ठ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
110
|
+
result: 'ṭh'
|
|
111
|
+
- pattern: (ड=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
112
|
+
result: 'ḍ'
|
|
113
|
+
- pattern: (ड़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
114
|
+
result: 'ṛ'
|
|
115
|
+
- pattern: (ड़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
116
|
+
result: 'ṛ'
|
|
117
|
+
- pattern: (ढ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
118
|
+
result: 'ḍh'
|
|
119
|
+
- pattern: (ढ़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
120
|
+
result: 'ṛh'
|
|
121
|
+
- pattern: (ण=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
122
|
+
result: 'ṇ'
|
|
123
|
+
- pattern: (त=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
124
|
+
result: 't'
|
|
125
|
+
- pattern: (थ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
126
|
+
result: 'th'
|
|
127
|
+
- pattern: (द=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
128
|
+
result: 'd'
|
|
129
|
+
- pattern: (ध=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
130
|
+
result: 'dh'
|
|
131
|
+
- pattern: (न=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
132
|
+
result: 'n'
|
|
133
|
+
- pattern: (प=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
134
|
+
result: 'p'
|
|
135
|
+
- pattern: (फ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
136
|
+
result: 'ph'
|
|
137
|
+
- pattern: (फ़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
138
|
+
result: 'ph'
|
|
139
|
+
- pattern: (ब=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
140
|
+
result: 'b'
|
|
141
|
+
- pattern: (भ=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
142
|
+
result: 'bh'
|
|
143
|
+
- pattern: (म=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
144
|
+
result: 'm'
|
|
145
|
+
- pattern: (य=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
146
|
+
result: 'y'
|
|
147
|
+
- pattern: (र=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
148
|
+
result: 'r'
|
|
149
|
+
- pattern: (ल=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
150
|
+
result: 'l'
|
|
151
|
+
- pattern: (व=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
152
|
+
result: 'v'
|
|
153
|
+
- pattern: (श=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
154
|
+
result: 'ś'
|
|
155
|
+
- pattern: (ष=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
156
|
+
result: 'sh'
|
|
157
|
+
- pattern: (स=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
158
|
+
result: 's'
|
|
159
|
+
- pattern: (स़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
160
|
+
result: 's̤'
|
|
161
|
+
- pattern: (ह=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
162
|
+
result: 'h'
|
|
163
|
+
- pattern: (ह़=?)(?=[\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d])
|
|
164
|
+
result: 'h'
|
|
165
|
+
|
|
76
166
|
# note[3]
|
|
77
167
|
- pattern: \u0902(?=[कक़खख़गग़घङ])
|
|
78
168
|
result: ṅ
|
|
@@ -134,6 +224,7 @@ map:
|
|
|
134
224
|
'ठ': 'ṭha'
|
|
135
225
|
'ड': 'ḍa'
|
|
136
226
|
'ड़': 'ṛa'
|
|
227
|
+
'ड़': 'ṛa'
|
|
137
228
|
'ढ': 'ḍha'
|
|
138
229
|
'ढ़': 'ṛha'
|
|
139
230
|
'ण': 'ṇa'
|
|
@@ -196,6 +287,7 @@ map:
|
|
|
196
287
|
'ै': "ai"
|
|
197
288
|
'ो': "o"
|
|
198
289
|
'्': ""
|
|
290
|
+
'़': ""
|
|
199
291
|
|
|
200
292
|
# digits
|
|
201
293
|
|
|
@@ -40,7 +40,25 @@ notes:
|
|
|
40
40
|
|
|
41
41
|
tests:
|
|
42
42
|
- source: "इस चुनौतीपूर्ण समय में 'वर्क फ्रॉम होम’ सामान्य बन चुका है"
|
|
43
|
-
expected: "
|
|
43
|
+
expected: "is cunăutīpūrṇ samay meṃ 'vark phrôm homa’ sāmāny ban cukā hai"
|
|
44
|
+
- source: "दिल्ली में त्योहार पर खरीददारी करने निकले बड़ी संख्या में लोग, कई जगहों पर लगा भीषण जाम"
|
|
45
|
+
expected: "dillī meṃ tyohār par kharīdadārī karane nikale baṛī saṃkhyā meṃ loga, kaī jagahoṃ par lagā bhīshaṇ jāma"
|
|
46
|
+
- source: "सरकार ने पेंशन भोगियों को लाइफ सर्टिफिकेट जमा कराने के मामले में दी बड़ी राहत"
|
|
47
|
+
expected: "sarakār ne peṃśan bhogiyoṃ ko lāiph sarṭiphikeṭ jamā karāne ke māmale meṃ dī baṛī rāhata"
|
|
48
|
+
- source: "कांग्रेस ने माना उसके लचर प्रदर्शन ने डुबोई महागठबंधन की लुटिया, पार्टी में उठने लगी आत्ममंथन की आवाज"
|
|
49
|
+
expected: "kāṃgres ne mānā usake lacar pradarśan ne ḍuboī mahāgaṭhabandhan kī luṭiyā, pārṭī meṃ uṭhane lagī ātmamanthan kī āvāja"
|
|
50
|
+
- source: "डिजिटल पेमेंट सिस्टम ने छोटे-मध्यम कारोबारों का दिया साथ, कोरोना की परेशानियों को किया कम"
|
|
51
|
+
expected: "ḍijiṭal pemeṃṭ sisṭam ne choṭe-madhyam kārobāroṃ kā diyā sātha, koronā kī pareśāniyoṃ ko kiyā kama"
|
|
52
|
+
- source: "छोटे व्यापारियों को ढूंढें, उनसे खरीदें और उनका साथ दें"
|
|
53
|
+
expected: "choṭe vyāpāriyoṃ ko ḍhūṃḍheṃ, unase kharīdeṃ ăur unakā sāth deṃ"
|
|
54
|
+
- source: "भारत के साथ साझीदारी को महत्व देंगे बाइडन, ओबामा प्रशासन में रहीं वरिष्ठ अधिकारी एलिसा ने जताई उम्मीद"
|
|
55
|
+
expected: "bhārat ke sāth sājhīdārī ko mahatv deṃge bāiḍana, obāmā praśāsan meṃ rahīṃ varishṭh adhikārī elisā ne jatāī ummīda"
|
|
56
|
+
- source: "दो महीने से कोमा में था युवक, चिकन की चर्चा सुनते ही आया होश"
|
|
57
|
+
expected: "do mahīne se komā meṃ thā yuvaka, cikan kī carcā sunate hī āyā hośa"
|
|
58
|
+
- source: "कोरोना के टीके पर खुशखबरी, भारत पहुंची रूसी वैक्सीन की पहली खेप"
|
|
59
|
+
expected: "koronā ke ṭīke par khuśakhabarī, bhārat pahuṃcī rūsī vaiksīn kī pahalī khepa"
|
|
60
|
+
- source: "दिल्ली के गांधी नगर स्थित एक दुकान में लगी भीषण आग, दमकल की 20 गाड़ियां मौके पर"
|
|
61
|
+
expected: "dillī ke gāṃdhī nagar sthit ek dukān meṃ lagī bhīshaṇ āga, damakal kī 20 gāṛiyāṃ măuke para"
|
|
44
62
|
|
|
45
63
|
map:
|
|
46
64
|
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: alalc
|
|
3
|
+
id: 1997
|
|
4
|
+
language: iso-639-2:kan
|
|
5
|
+
source_script: Kana
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Kannada Romanization, 1997
|
|
8
|
+
url: http://catdir.loc.gov/catdir/cpso/romanization/kannada.pdf
|
|
9
|
+
creation_date: 1997
|
|
10
|
+
description: |
|
|
11
|
+
ALA-LC Romanization table for Kannada
|
|
12
|
+
|
|
13
|
+
notes:
|
|
14
|
+
|
|
15
|
+
- Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
|
|
16
|
+
vowels following a consonant can be found in grammars; no distinction between the two is
|
|
17
|
+
made in transliteration.
|
|
18
|
+
|
|
19
|
+
- |
|
|
20
|
+
The vowel a is implicit after all consonants and consonant clusters and is supplied in
|
|
21
|
+
transliteration, with the following exceptions:
|
|
22
|
+
|
|
23
|
+
a) when another vowel is indicated by its appropriate sign; and
|
|
24
|
+
b) when the absence of any vowel is indicated by the superscript sign (◌್).
|
|
25
|
+
|
|
26
|
+
- |
|
|
27
|
+
Exception: Anusvāra is transliterated by:
|
|
28
|
+
|
|
29
|
+
a) ṅ before gutturals,
|
|
30
|
+
b) ñ before palatals,
|
|
31
|
+
c) ṇ before cerebrals,
|
|
32
|
+
d) n before dentals, and
|
|
33
|
+
e) m before labials.
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
tests:
|
|
37
|
+
- source: "ಕರ್ಣಾಟಕ"
|
|
38
|
+
expected: "karṇāṭaka"
|
|
39
|
+
- source: "ಬೆಂಗಳೂರು"
|
|
40
|
+
expected: "beṅgaḷūru"
|
|
41
|
+
- source: "ಉಡುಪಿಯಲ್ಲಿ ಪ್ರಪ್ರಥಮ ಬಾರಿಗೆ ಪ್ರಾರಂಭವಾಗಿರುವ ದೇಶಿ ಉತ್ಪನ್ನಗಳ ಮಳಿಗೆ"
|
|
42
|
+
expected: "uḍupiyalli praprathama bārige prāraṃbhavāgiruva dēśi utpannagaḷa maḷige"
|
|
43
|
+
- source: "ದೇವರ ಹೆಸರು ಬಳಸಿ ಆನ್ಲೈನ್ ಬೆಟ್ಟಿಂಗ್!"
|
|
44
|
+
expected: "dēvara hesaru baḷasi ānlain beṭṭiṃg!"
|
|
45
|
+
- source: "ಚಿಕ್ಕಮಗಳೂರು : ಪುಷ್ಪ ಸಮರ್ಪಣೆ ವೇಳೆ ಮಗಳನ್ನ ನೆನೆದು ಕಣ್ಣೀರಿಟ್ಟ ಮೃತ ಪೇದೆ ತಾಯಿ"
|
|
46
|
+
expected: "cikkamagaḷūru : puṣpa samarpaṇe vēḷe magaḷanna nenedu kaṇṇīriṭṭa mṛta pēde tāyi"
|
|
47
|
+
- source: "ಸ್ವಾಮಿತ್ವ: ಹೊಸ ಯೋಜನೆಯಿಂದ ನಮಗೆ ಏನು ಲಾಭ ?"
|
|
48
|
+
expected: "svāmitva: hosa yōjaneyinda namage ēnu lābha ?"
|
|
49
|
+
- source: "ಮರಳು ಸಾಗಾಣಿಕೆ ವ್ಯವಹಾರ ಆಗಬಾರದು :ಅಧಿಕಾರಿಗಳಿಗೆ ಖಡಕ್ ಸೂಚನೆ ನೀಡಿದ ಜಿಲ್ಲಾಧಿಕಾರಿ"
|
|
50
|
+
expected: "maraḷu sāgāṇike vyavahāra āgabāradu :adhikārigaḷige khaḍak sūcane nīḍida jillādhikāri"
|
|
51
|
+
- source: "ಹಾವೇರಿ ಜಿಲ್ಲೆಯಲ್ಲಿ ೯೭ ಜನರಲ್ಲಿ ಕೋವಿಡ್ ಸೋಂಕು ಪತ್ತೆ ; 54 ಮಂದಿ ಗುಣಮುಖ"
|
|
52
|
+
expected: "hāvēri jilleyalli 97 janaralli kōviḍ sōṃku patte ; 54 maṃdi guṇamukha"
|
|
53
|
+
- source: "ಸಿಂದಗಿ ಐಸಿಐಸಿಐ ಬ್ಯಾಂಕ್ ಸೆಕ್ಯುರಿಟಿ ಗಾರ್ಡ್ ಹತ್ಯೆ ಪ್ರಕರಣ ಭೇದಿಸಿದ ಪೊಲೀಸರು"
|
|
54
|
+
expected: "sindagi aisiaisiai byāṃk sekyuriṭi gārḍ hatye prakaraṇa bhēdisida polīsaru"
|
|
55
|
+
- source: "ಬ್ಯಾಂಕರ್ಗಳೊಂದಿಗೆ ಡಿವಿ ಸಭೆ : ಆಧ್ಯತಾ ವಲಯ, ಸಾಲ ಯೋಜನೆ ತ್ವರಿತ ಮಂಜೂರಿಗೆ ಸೂಚನೆ"
|
|
56
|
+
expected: "byāṅkargaḷoṃdige ḍivi sabhe : ādhyatā valaya, sāla yōjane tvarita maṃjūrige sūcane"
|
|
57
|
+
- source: "ಪೊಲೀಸ್ ಇಲಾಖೆ ಸಮಗ್ರ ಅಭಿವೃದ್ಧಿ; ಡಿಜಿಪಿ ನೇತೃತ್ವದಲ್ಲಿ ಸಮಿತಿ ರಚನೆ: ಬೊಮ್ಮಾಯಿ"
|
|
58
|
+
expected: "polīs ilākhe samagra abhivṛddhi; ḍijipi nētṛtvadalli samiti racane: beūmmāyi"
|
|
59
|
+
- source: "ಕೆಟ್ಟಿರುವ ರಸ್ತೆಗಳ ದುರಸ್ತಿಗೆ ಸರಕಾರದ ಯೋಜನೆ"
|
|
60
|
+
expected: "keṭṭiruva rastegaḷa durastige sarakārada yōjane"
|
|
61
|
+
|
|
62
|
+
map:
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
rules:
|
|
66
|
+
# to cover diacritic and vowel less consonants rule II
|
|
67
|
+
- pattern: ([ಕ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
68
|
+
result: 'k'
|
|
69
|
+
- pattern: ([ಖ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
70
|
+
result: 'kh'
|
|
71
|
+
- pattern: ([ಗ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
72
|
+
result: 'g'
|
|
73
|
+
- pattern: ([ಘ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
74
|
+
result: 'gh'
|
|
75
|
+
- pattern: ([ಙ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
76
|
+
result: 'ṅ'
|
|
77
|
+
- pattern: ([ಚ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
78
|
+
result: 'c'
|
|
79
|
+
- pattern: ([ಛ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
80
|
+
result: 'ch'
|
|
81
|
+
- pattern: ([ಜ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
82
|
+
result: 'j'
|
|
83
|
+
- pattern: ([ಝ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
84
|
+
result: 'jh'
|
|
85
|
+
- pattern: ([ಞ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
86
|
+
result: 'ñ'
|
|
87
|
+
- pattern: ([ಟ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
88
|
+
result: 'ṭ'
|
|
89
|
+
- pattern: ([ಠ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
90
|
+
result: 'ṭh'
|
|
91
|
+
- pattern: ([ಡ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
92
|
+
result: 'ḍ'
|
|
93
|
+
- pattern: ([ಢ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
94
|
+
result: 'ḍh'
|
|
95
|
+
- pattern: ([ಣ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
96
|
+
result: 'ṇ'
|
|
97
|
+
- pattern: ([ತ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
98
|
+
result: 't'
|
|
99
|
+
- pattern: ([ಥ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
100
|
+
result: 'th'
|
|
101
|
+
- pattern: ([ದ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
102
|
+
result: 'd'
|
|
103
|
+
- pattern: ([ಧ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
104
|
+
result: 'dh'
|
|
105
|
+
- pattern: ([ನ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
106
|
+
result: 'n'
|
|
107
|
+
- pattern: ([ಪ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
108
|
+
result: 'p'
|
|
109
|
+
- pattern: ([ಫ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
110
|
+
result: 'ph'
|
|
111
|
+
- pattern: ([ಬ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
112
|
+
result: 'b'
|
|
113
|
+
- pattern: ([ಭ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
114
|
+
result: 'bh'
|
|
115
|
+
- pattern: ([ಮ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
116
|
+
result: 'm'
|
|
117
|
+
- pattern: ([ಯ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
118
|
+
result: 'y'
|
|
119
|
+
- pattern: ([ರ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
120
|
+
result: 'r'
|
|
121
|
+
- pattern: ([ಱ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
122
|
+
result: 'ṟ'
|
|
123
|
+
- pattern: ([ಲ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
124
|
+
result: 'l'
|
|
125
|
+
- pattern: ([ಳ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
126
|
+
result: 'ḷ'
|
|
127
|
+
- pattern: ([ೞ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
128
|
+
result: 'l̤'
|
|
129
|
+
- pattern: ([ವ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
130
|
+
result: 'v'
|
|
131
|
+
- pattern: ([ಶ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
132
|
+
result: 'ś'
|
|
133
|
+
- pattern: ([ಷ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
134
|
+
result: 'ṣ'
|
|
135
|
+
- pattern: ([ಸ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
136
|
+
result: 's'
|
|
137
|
+
- pattern: ([ಹ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
|
|
138
|
+
result: 'h'
|
|
139
|
+
|
|
140
|
+
# rule[III]
|
|
141
|
+
- pattern: \u0C82(?=[ಕಖಗಘಙ])
|
|
142
|
+
result: ṅ
|
|
143
|
+
- pattern: \u0C82(?=[ಚಛಜಝಞ])
|
|
144
|
+
result: ñ
|
|
145
|
+
- pattern: \u0C82(?=[ಟಠಡಢಣ])
|
|
146
|
+
result: ṇ
|
|
147
|
+
- pattern: \u0C82(?=[ತಥದಧನ])
|
|
148
|
+
result: n
|
|
149
|
+
|
|
150
|
+
characters:
|
|
151
|
+
'ಅ': 'a'
|
|
152
|
+
'ಆ': 'ā'
|
|
153
|
+
'ಇ': 'i'
|
|
154
|
+
'ಈ': 'ī'
|
|
155
|
+
'ಉ': 'u'
|
|
156
|
+
'ಊ': 'ū'
|
|
157
|
+
'ಋ': 'ṛ'
|
|
158
|
+
'ೠ': 'ṝ'
|
|
159
|
+
|
|
160
|
+
'ಌ': 'ḻ'
|
|
161
|
+
|
|
162
|
+
'ಎ': 'e'
|
|
163
|
+
'ಏ': 'ē'
|
|
164
|
+
'ಐ': 'ai'
|
|
165
|
+
|
|
166
|
+
'ಒ': 'o'
|
|
167
|
+
'ಓ': 'ō'
|
|
168
|
+
'ಔ': 'au'
|
|
169
|
+
|
|
170
|
+
# Gutturals
|
|
171
|
+
'ಕ': 'ka'
|
|
172
|
+
'ಖ': 'kha'
|
|
173
|
+
'ಗ': 'ga'
|
|
174
|
+
'ಘ': 'gha'
|
|
175
|
+
'ಙ': 'ṅa'
|
|
176
|
+
|
|
177
|
+
# Palatals
|
|
178
|
+
'ಚ': 'ca'
|
|
179
|
+
'ಛ': 'cha'
|
|
180
|
+
'ಜ': 'ja'
|
|
181
|
+
'ಝ': 'jha'
|
|
182
|
+
'ಞ': 'ña'
|
|
183
|
+
|
|
184
|
+
# Cerebrals
|
|
185
|
+
'ಟ': 'ṭa'
|
|
186
|
+
'ಠ': 'ṭha'
|
|
187
|
+
'ಡ': 'ḍa'
|
|
188
|
+
'ಢ': 'ḍha'
|
|
189
|
+
'ಣ': 'ṇa'
|
|
190
|
+
|
|
191
|
+
# Dentals
|
|
192
|
+
'ತ': 'ta'
|
|
193
|
+
'ಥ': 'tha'
|
|
194
|
+
'ದ': 'da'
|
|
195
|
+
'ಧ': 'dha'
|
|
196
|
+
'ನ': 'na'
|
|
197
|
+
|
|
198
|
+
# Labials
|
|
199
|
+
'ಪ': 'pa'
|
|
200
|
+
'ಫ': 'pha'
|
|
201
|
+
'ಬ': 'ba'
|
|
202
|
+
'ಭ': 'bha'
|
|
203
|
+
'ಮ': 'ma'
|
|
204
|
+
|
|
205
|
+
# Semivowels
|
|
206
|
+
'ಯ': 'ya'
|
|
207
|
+
'ರ': 'ra'
|
|
208
|
+
'ಱ': 'ṟa'
|
|
209
|
+
'ಲ': 'la'
|
|
210
|
+
'ಳ': 'ḷa'
|
|
211
|
+
'ೞ': 'l̤a'
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
'ವ': 'va'
|
|
215
|
+
|
|
216
|
+
# Sibilants
|
|
217
|
+
'ಶ': 'śa'
|
|
218
|
+
'ಷ': 'ṣa'
|
|
219
|
+
'ಸ': 'sa'
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# Aspirate
|
|
223
|
+
'ಹ': 'ha'
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
# Bisarga
|
|
227
|
+
'ಃ': 'ḥ'
|
|
228
|
+
|
|
229
|
+
# Anusvāra
|
|
230
|
+
'ಂ': 'ṃ'
|
|
231
|
+
|
|
232
|
+
'\u0cbc': '' #nukta
|
|
233
|
+
|
|
234
|
+
# Medials # Needed for connecting constants
|
|
235
|
+
'ಾ': "ā"
|
|
236
|
+
'ಿ': "i"
|
|
237
|
+
'ೀ': "ī"
|
|
238
|
+
'ು': "u"
|
|
239
|
+
'ೂ': "ū"
|
|
240
|
+
'ೃ': "ṛ"
|
|
241
|
+
'ೄ': "ṝ"
|
|
242
|
+
'\u0CE2': 'ḻ' # KANNADA VOWEL SIGN VOCALIC L ( ೢ)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
'ೆ': "e"
|
|
246
|
+
'ೇ': "ē"
|
|
247
|
+
'ೈ': "ai"
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
'ೊ': 'o'
|
|
251
|
+
'ೋ': 'ō'
|
|
252
|
+
'ೌ': 'au'
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
'्': ''
|
|
256
|
+
'़': ''
|
|
257
|
+
'್': '' # used for pronounciation without vowel
|
|
258
|
+
"": '' # no need for zero with joiner
|
|
259
|
+
"": '' # no need for zero with non joiner
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
# Digits
|
|
264
|
+
|
|
265
|
+
'೦': '0'
|
|
266
|
+
'೧': '1'
|
|
267
|
+
'೨': '2'
|
|
268
|
+
'೩': '3'
|
|
269
|
+
'೪': '4'
|
|
270
|
+
'೫': '5'
|
|
271
|
+
'೬': '6'
|
|
272
|
+
'೭': '7'
|
|
273
|
+
'೮': '8'
|
|
274
|
+
'೯': '9'
|