interscript 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/interscript.rb +5 -1
- data/lib/interscript/fs.rb +3 -1
- data/lib/interscript/mapping.rb +2 -2
- data/lib/interscript/opal.rb +5 -1
- data/lib/interscript/opal/maps.js.erb +7 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +1 -1
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
- data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
- data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +1 -1
- data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +2 -2
- data/maps/alalc-ell-Grek-Latn-1997.yaml +2 -3
- data/maps/alalc-ell-Grek-Latn-2010.yaml +2 -3
- data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -2
- data/maps/alalc-kor-Hang-Latn-1997.yaml +1 -1
- data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
- data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +0 -0
- data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +1 -2
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +2 -2
- data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +0 -0
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -1
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -2
- data/maps/bgn-kor-Hang-Latn-1943.yaml +1 -1
- data/maps/bgn-kor-Kore-Latn-1943.yaml +1 -1
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +5 -5
- data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +0 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +3 -4
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -1
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -1
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +17 -17
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +2 -2
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +2 -2
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -1
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -1
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
- data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
- data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +4 -4
- data/maps/by-bel-Cyrl-Latn-2007.yaml +3 -3
- data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +4 -5
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +4 -5
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -1
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -1
- data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -1
- data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +1 -1
- data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +1 -1
- data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +1 -1
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -5
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -4
- data/maps/icao-per-Arab-Latn-9303.yaml +0 -1
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -1
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -1
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +4 -5
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +1 -2
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -1
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
- data/maps/kp-kor-Hang-Latn-2002.yaml +4 -4
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +2 -2
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +4 -4
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +4 -4
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +1 -2
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +4 -4
- data/maps/nil-kor-Hang-Hang-jamo.yaml +3 -3
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +3 -3
- data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +0 -1
- data/maps/{odni-mkd-cyrl-latn-2015.yaml → odni-mkd-Cyrl-Latn-2015.yaml} +0 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +2 -2
- data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -2
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1968.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1999.yaml +3 -3
- data/maps/{ses-ara-arab-latn-1930.yaml → ses-ara-Arab-Latn-1930.yaml} +7 -3
- data/maps/un-ara-Arab-Latn-1971.yaml +16 -4
- data/maps/un-ara-Arab-Latn-1972.yaml +14 -7
- data/maps/un-ara-Arab-Latn-2017.yaml +56 -19
- data/maps/un-bel-Cyrl-Latn-2007.yaml +3 -3
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +1 -2
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -1
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +42 -42
- data/maps/un-mon-Mong-Latn-2013.yaml +9 -3
- data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
- data/maps/{un-ukr-cyrl-latn-1998.yaml → un-ukr-Cyrl-Latn-1998.yaml} +1 -1
- data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
- data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -2
- data/maps/var-tha-Thai-Thai-phonemic.yaml +5 -5
- data/maps/var-tha-Thai-Zsym-ipa.yaml +12 -12
- data/maps/var-zho-Hani-Latn-1979.yaml +7 -7
- metadata +41 -15
@@ -0,0 +1,175 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bis
|
3
|
+
id: 1991
|
4
|
+
language: pnj
|
5
|
+
source_script: Guru
|
6
|
+
destination_script: Latn
|
7
|
+
name: Indian script code for information interchange - ISCII - Panjabi(Gurmukhi) Romanization
|
8
|
+
#url:
|
9
|
+
creation_date: 1991
|
10
|
+
description: |
|
11
|
+
IS 13194 (1991): Indian script code for information
|
12
|
+
interchange - ISCII [LITD 20: Indian Language Technologies
|
13
|
+
and Products]
|
14
|
+
|
15
|
+
notes:
|
16
|
+
- |
|
17
|
+
Exception: Anusvāra is transliterated by:
|
18
|
+
|
19
|
+
a) ṅ before gutturals,
|
20
|
+
b) ñ before palatals,
|
21
|
+
c) ṇ before cerebrals,
|
22
|
+
d) n before dentals, and
|
23
|
+
e) m before labials.
|
24
|
+
|
25
|
+
tests:
|
26
|
+
- source: "ਸਦਾ ਜਵਾਨ ਰਹੋ"
|
27
|
+
expected: "sdā jvāṉ rhō"
|
28
|
+
- source: "ਸਵਾਮੀ ਅਗਨੀਵੇਸ਼ ਦੀ ਮੌਤ"
|
29
|
+
expected: "svāmī agṉīvēs dī maut"
|
30
|
+
- source: "ਇਸ ਲਈ ਖੱਟੀ ਤਲੀ ਚੀਜ਼ , ਫਾਸਟ ਫੁੂਟ, ਤੇਜ਼ ਮਿਰਚ ਮਸਾਲਿਆਂ ਨੂੰ ਛੱਡਕੇ ਚੰਗੇ ਤੇ ਮੌਸਮੀ ਫਲ, ਹਰੀਆਂ ਸ਼ਬਜ਼ੀਆਂ, ਘਰ ਦੀ ਲੱਸੀ ਤੇ ਦਹੀਂ ਤੇ ਔਰਗੈਨਿਕ ਚੀਜ਼ਾਂ ਹੀ ਅਪਣਾਉ"
|
31
|
+
expected: "is lī khṭī tlī cīz , phāsṭ phuūṭ, tēz mirc msāliān ṉūṃ chḍkē cṅgē tē mausmī phl, hrīān śbzīān, ghr dī lsī tē dhīn tē aurgaiṉik cīzān hī apṇāu"
|
32
|
+
- source: "ਤੁਹਾਡਾ ਚਿਹਰਾ ਸਦਾ ਚੜ੍ਹਦੀ ਕਲਾ ‘ਚ ਰਹਿਣਾ ਤੇ ਹਮੇਸ਼ਾ ਖਿੜੀਆਂ ਰਹਿਣਾ ਤੇ ਠਾਠਾਂ ਮਾਰਦਾ ਸਰੀਰ ਹੀ ਤੁਹਾਡੇ ਜਵਾਨ ਰਹਿਣ ਦੀ ਨਿਸ਼ਾਨੀ ਹੈ"
|
33
|
+
expected: "tuhāḍā cihrā sdā cṛahdī klā ‘c rhiṇā tē hmēśā khiṛaīān rhiṇā tē ṭhāṭhān mārdā srīr hī tuhāḍē jvāṉ rhiṇ dī ṉiśāṉī hai"
|
34
|
+
- source: "ਧੌਲੇ ਆਉਣਾ ਜਾਂ ਕਹਿ ਲਵੋ, ਵਾਲ ਚਿੱਟੇ ਹੋਣਾ ਬੁੱਢਾਪਾ ਨਹੀਂ ਹੈ।ਪਰ ਉਮਰ ਤਂੋ ਪਹਿਲਾਂ ਮੂੰਹ ਤੋਂ ਨੂਰ ਉੜ ਜਾਣਾ,ਹਮੇਸ਼ਾਂ ਥੱਕਿਆ-2 ਰਹਿਣਾ ,ਬਿਮਾਰ ਰਹਿਣਾ ਅਸਲ ਬੁਢਾਪਾ ਹੈ"
|
35
|
+
expected: "dhaulē āuṇā jān khi lvō, vāl ciṭē hōṇā buḍhāpā ṉhīn hai.pr umr tnō philān mūṃh tōn ṉūr uṛa jāṇā,hmēśān thkiā-2 rhiṇā ,bimār rhiṇā asl buḍhāpā hai"
|
36
|
+
- source: "ਇਸ ਲਈ ਇਹ ਆਪਾਂ ਨੂੰ ਦੇਖਣਾ ਪਵੇਗਾ ਕਿ ਆਪਾਂ ਕਿਵੇਂ ਤੰਦਰੁਸਤ ਰਹਿਣਾ ਹੈ ਤੇ ਨਿਰੋਗ ਰਹਿਣਾ ਹੈ"
|
37
|
+
expected: "is lī ih āpān ṉūṃ dēkhṇā pvēgā ki āpān kivēn tndrust rhiṇā hai tē ṉirōg rhiṇā hai"
|
38
|
+
- source: "ਸਮਾਜਿਕ ਕਾਰਕੁੰਨ ਸਵਾਮੀ ਅਗਨੀਵੇਸ ਦਾ ਅੱਜ ਸ਼ਾਮੀਂ ਦਿਹਾਂਤ ਹੋ ਗਿਆ"
|
39
|
+
expected: "smājik kārkunṉ svāmī agṉīvēs dā aj sāmīn dihānt hō giā"
|
40
|
+
- source: "ਹਰ ਇਨਸਾਨ ਸਦਾ ਸਵਸਥ ਤੇ ਜਵਾਨ ਰਹਿਣਾ ਚਾਹੁੰਦਾ ਹੈ"
|
41
|
+
expected: "hr iṉsāṉ sdā svsth tē jvāṉ rhiṇā cāhundā hai"
|
42
|
+
- source: "ਜਨਮ ਲੈਣਾ ਤੇ ਮੌਤ ਇੱਕ ਅਟਲ ਸੱਚਾਈ ਹੈ"
|
43
|
+
expected: "jṉm laiṇā tē maut ik aṭl scāī hai"
|
44
|
+
- source: "ਇਸਦੇ ਉਲਟ ਤੁਹਾਡੀ ਗਲਤ ਜੀਵਨ ਸ਼ੈਲੀ, ਗਲਤ ਖਾਣਾ ਤੇ ਹਮੇਸ਼ਾ ਕੀਮਤੀ ਸਰੀਰ ਪ੍ਰਤੀ ਲਾਪਰਵਾਹੀ ਕਦੇ ਵੀ ਤੁਹਾਨੁੂੰ ਜਵਾਨ ਤੇ ਨਿਰੋਗੀ ਨਹੀਂ ਰੱਖ ਸਕਦੀ"
|
45
|
+
expected: "isdē ulṭ tuhāḍī glt jīvṉ śailī, glt khāṇā tē hmēśā kīmtī srīr prtī lāprvāhī kdē vī tuhāṉuūṃ jvāṉ tē ṉirōgī ṉhīn rkh skdī"
|
46
|
+
|
47
|
+
map:
|
48
|
+
|
49
|
+
rules:
|
50
|
+
# note
|
51
|
+
- pattern: \u0A70(?=[ਕਖਖ਼ਗਗ਼ਘਙ]) # ੰ before gutturals
|
52
|
+
result: ṅ
|
53
|
+
- pattern: \u0A70(?=[ਚਛਜਜ਼ਝਞ]) # ੰ before palatals
|
54
|
+
result: ñ
|
55
|
+
- pattern: \u0A70(?=[ਟਠਡਢਣ]) # ੰ before cerebrals
|
56
|
+
result: ṇ
|
57
|
+
- pattern: \u0A70(?=[ਤਥਦਧਨ]) # ੰ before dentals
|
58
|
+
result: n
|
59
|
+
- pattern: \u0A70(?=[ਪਫਬਭਮ])
|
60
|
+
result: m
|
61
|
+
|
62
|
+
|
63
|
+
characters:
|
64
|
+
'ਅ': 'a'
|
65
|
+
'ਆ': 'ā'
|
66
|
+
'ਇ': 'i'
|
67
|
+
'ਈ': 'ī'
|
68
|
+
'ਉ': 'u'
|
69
|
+
'ਊ': 'ū'
|
70
|
+
'ੲ': 'ṛ'
|
71
|
+
'ੳ': 'ṝ'
|
72
|
+
'ਏ': 'ē'
|
73
|
+
'ਐ': 'ai'
|
74
|
+
|
75
|
+
'ਓ': 'ŏ'
|
76
|
+
'ਔ': 'au'
|
77
|
+
|
78
|
+
# II. Consonants (see Note 2)
|
79
|
+
# Gutturals
|
80
|
+
'ਕ': 'k'
|
81
|
+
'ਖ': 'kh'
|
82
|
+
'ਗ': 'g'
|
83
|
+
'ਘ': 'gh'
|
84
|
+
'ਙ': 'ṅ'
|
85
|
+
|
86
|
+
# Palatals
|
87
|
+
'ਚ': 'c'
|
88
|
+
'ਛ': 'ch'
|
89
|
+
'ਜ': 'j'
|
90
|
+
'ਝ': 'jh'
|
91
|
+
'ਞ': 'ñ'
|
92
|
+
|
93
|
+
# Cerebrals
|
94
|
+
'ਟ': 'ṭ'
|
95
|
+
'ਠ': 'ṭh'
|
96
|
+
'ਡ': 'ḍ'
|
97
|
+
'ਢ': 'ḍh'
|
98
|
+
'ਣ': 'ṇ'
|
99
|
+
|
100
|
+
# Dentals
|
101
|
+
'ਤ': 't'
|
102
|
+
'ਥ': 'th'
|
103
|
+
'ਦ': 'd'
|
104
|
+
'ੜ': 'ṛa'
|
105
|
+
'ਧ': 'dh'
|
106
|
+
'ਨ': 'ṉ'
|
107
|
+
|
108
|
+
# Labials
|
109
|
+
'ਪ': 'p'
|
110
|
+
'ਫ': 'ph'
|
111
|
+
'ਬ': 'b'
|
112
|
+
'ਭ': 'bh'
|
113
|
+
'ਮ': 'm'
|
114
|
+
|
115
|
+
# Semivowels
|
116
|
+
'ਯ': 'y'
|
117
|
+
#'य़': 'ẏ'
|
118
|
+
'ਰ': 'r'
|
119
|
+
#'ऱ': 'ṟ'
|
120
|
+
'ਲ': 'l'
|
121
|
+
'ਲ਼': 'ḷ'
|
122
|
+
#'ऴ': 'ẕ'
|
123
|
+
|
124
|
+
|
125
|
+
'ਵ': 'v'
|
126
|
+
|
127
|
+
# Sibilants
|
128
|
+
'ਸ਼': 'ś'
|
129
|
+
#'ष': 'ṣ'
|
130
|
+
'ਸ': 's'
|
131
|
+
|
132
|
+
|
133
|
+
# Aspirate
|
134
|
+
'ਹ': 'h'
|
135
|
+
|
136
|
+
|
137
|
+
#Nukta Constants
|
138
|
+
|
139
|
+
'क़': 'q'
|
140
|
+
'ਖ਼': 'ḵẖ'
|
141
|
+
'ਗ਼': 'gẖ'
|
142
|
+
'ਜ਼': 'z'
|
143
|
+
'ਫ਼': 'f'
|
144
|
+
|
145
|
+
# Chandrabindu
|
146
|
+
'ँ': 'm'
|
147
|
+
|
148
|
+
# Bisarga
|
149
|
+
'ः ': 'ḥ'
|
150
|
+
'ਂ': 'n'
|
151
|
+
|
152
|
+
# Anusvāra
|
153
|
+
'ੰ': 'ṃ'
|
154
|
+
|
155
|
+
# Medials # Needed for connecting constants
|
156
|
+
'ਾ': "ā"
|
157
|
+
'ਿ': "i"
|
158
|
+
'ੀ': "ī"
|
159
|
+
'ੁ': "u"
|
160
|
+
'ੂ': "ū"
|
161
|
+
'ृ': "ṛ"
|
162
|
+
|
163
|
+
|
164
|
+
'ੇ': "ē"
|
165
|
+
'ੈ': "ai"
|
166
|
+
|
167
|
+
|
168
|
+
'ੋ': 'ō'
|
169
|
+
'ੌ': 'au'
|
170
|
+
|
171
|
+
'੍': ''
|
172
|
+
'਼': ''
|
173
|
+
'ੱ': ''
|
174
|
+
'।': '.'
|
175
|
+
"": '' # no need for zero with joiner
|
@@ -0,0 +1,170 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bis
|
3
|
+
id: 1991
|
4
|
+
language: tel
|
5
|
+
source_script: Telu
|
6
|
+
destination_script: Latn
|
7
|
+
name: Indian script code for information interchange - ISCII - Telegu Romanization
|
8
|
+
#url:
|
9
|
+
creation_date: 1991
|
10
|
+
description: |
|
11
|
+
IS 13194 (1991): Indian script code for information
|
12
|
+
interchange - ISCII [LITD 20: Indian Language Technologies
|
13
|
+
and Products]
|
14
|
+
|
15
|
+
notes:
|
16
|
+
- |
|
17
|
+
Exception: Anusvāra is transliterated by:
|
18
|
+
|
19
|
+
a) ṅ before gutturals,
|
20
|
+
b) ñ before palatals,
|
21
|
+
c) ṇ before cerebrals,
|
22
|
+
d) n before dentals, and
|
23
|
+
e) m before labials.
|
24
|
+
|
25
|
+
tests:
|
26
|
+
- source: "ఇప్పుడు ఇదే కోవలో టాలీవుడ్లో మరో మల్టీస్టారర్ రూపొందనుందని సినీ వర్గాల్లో వార్తలు వినిపిస్తున్నాయి"
|
27
|
+
expected: "ippuḍu idē kŏvlŏ ṭālīvuḍlŏ mrŏ mlṭīsṭārr rūpondnundni sinī vrgāllŏ vārtlu vinipistunnāyi"
|
28
|
+
- source: "అంటే ఉంటాయి, అయితే అవి చాలా పెద్దవై ఉండాల్సిన అవసరం లేదు అంటున్నారు మమ్ముట్టి"
|
29
|
+
expected: "aṇṭē uṇṭāyi, ayitē avi cālā peddvai uṇḍālsin avsrṃ lēdu aṇṭunnāru mmmuṭṭi"
|
30
|
+
- source: "ఆ సంతోషాన్ని అభిమానులతో పంచుకున్నారు"
|
31
|
+
expected: "ā sntŏṣānni abhimānultŏ pñcukunnāru"
|
32
|
+
- source: "కెమెరాను అన్బాక్స్ చేసే వీడియోను సోషల్ మీడియాలో అభిమానులతో పంచుకున్నారు"
|
33
|
+
expected: "kemerānu anbāks cēsē vīḍiyŏnu sŏṣl mīḍiyālŏ abhimānultŏ pñcukunnāru"
|
34
|
+
- source: "ఇన్నాళ్లకు నిజమయింది. ఇక ఇప్పటి నుంచి దీంతో ఫొటోలు క్లిక్ మనిపిస్తా’’ అని ఆ వీడియోలో పేర్కొన్నారు"
|
35
|
+
expected: "innāḷlku nijmyindi. ik ippṭi nuñci dīntŏ phoṭŏlu klik mnipistā’’ ani ā vīḍiyŏlŏ pērkonnāru"
|
36
|
+
- source: "గవర్నర్తో కంగనా భేటీ"
|
37
|
+
expected: "gvrnrtŏ kṅgnā bhēṭī"
|
38
|
+
- source: "శ్రియ సినిమా సెట్లో అడుగుపెట్టి ఆరు నెలలు కావొస్తోంది"
|
39
|
+
expected: "śriy sinimā seṭlŏ aḍugupeṭṭi āru nellu kāvostŏndi"
|
40
|
+
- source: "ఇప్పుడు తను కోరుకున్న కెమెరా చేతికి రావడంతో త్వరలో మమ్ముట్టి నుంచి స్టన్నింగ్ ఫొటోస్ రావడం ఖాయం అంటున్నారు ఆయన అభిమానులు"
|
41
|
+
expected: "ippuḍu tnu kŏrukunn kemerā cētiki rāvḍntŏ tvrlŏ mmmuṭṭi nuñci sṭnniṅg phoṭŏs rāvḍṃ khāyṃ aṇṭunnāru āyn abhimānulu"
|
42
|
+
- source: "ఇప్పుడు ఆ వీడియో వైరల్ అయింది. ‘ఆ కెమెరాను కొనాలనేది చాలాకాలంగా నా కల."
|
43
|
+
expected: "ippuḍu ā vīḍiyŏ vairl ayindi. ‘ā kemerānu konālnēdi cālākālṅgā nā kl."
|
44
|
+
- source: "మరో వైపు ఎన్టీఆర్, రామ్చరణ్ కలిసి ట్రిపుల్ ఆర్ సినిమాలో నటిస్తున్నారు"
|
45
|
+
expected: "mrŏ vaipu enṭīār, rāmcrṇ klisi ṭripul ār sinimālŏ nṭistunnāru"
|
46
|
+
map:
|
47
|
+
|
48
|
+
rules:
|
49
|
+
# note
|
50
|
+
- pattern: \u0C02(?=[కఖగఘఙ])
|
51
|
+
result: ṅ
|
52
|
+
- pattern: \u0C02(?=[చఛజఝఞ])
|
53
|
+
result: ñ
|
54
|
+
- pattern: \u0C02(?=[టఠడఢణ])
|
55
|
+
result: ṇ
|
56
|
+
- pattern: \u0C02(?=[తథదధన])
|
57
|
+
result: n
|
58
|
+
- pattern: \u0C02(?=[పఫబభమ])
|
59
|
+
result: m
|
60
|
+
|
61
|
+
|
62
|
+
characters:
|
63
|
+
'అ': 'a'
|
64
|
+
'ఆ': 'ā'
|
65
|
+
'ఇ': 'i'
|
66
|
+
'ఈ': 'ī'
|
67
|
+
'ఉ': 'u'
|
68
|
+
'ఊ': 'ū'
|
69
|
+
'ఋ': 'ṛ'
|
70
|
+
'ఌ': 'ḻ'
|
71
|
+
'ౡ': 'ḻ'
|
72
|
+
'ఎ': 'e'
|
73
|
+
'ఏ': 'ē'
|
74
|
+
'ఐ': 'ai'
|
75
|
+
'ఒ': 'o'
|
76
|
+
'ఓ': 'ŏ'
|
77
|
+
'ఔ': 'au'
|
78
|
+
|
79
|
+
# II. Consonants (see Note 2)
|
80
|
+
# Gutturals క ఖ గ ఘ ఙ
|
81
|
+
'క': 'k'
|
82
|
+
'ఖ': 'kh'
|
83
|
+
'గ': 'g'
|
84
|
+
'ఘ': 'gh'
|
85
|
+
'ఙ': 'ṅ'
|
86
|
+
|
87
|
+
# Palatals చ ఛ జ ఝ ఞ
|
88
|
+
'చ': 'c'
|
89
|
+
'ఛ': 'ch'
|
90
|
+
'జ': 'j'
|
91
|
+
'ఝ': 'jh'
|
92
|
+
'ఞ': 'ñ'
|
93
|
+
|
94
|
+
# Cerebrals ట ఠ డ ఢ ణ
|
95
|
+
'ట': 'ṭ'
|
96
|
+
'ఠ': 'ṭh'
|
97
|
+
'డ': 'ḍ'
|
98
|
+
'ఢ': 'ḍh'
|
99
|
+
'ణ': 'ṇ'
|
100
|
+
|
101
|
+
# Dentals త థ ద ధ న
|
102
|
+
'త': 't'
|
103
|
+
#'ৎ': 't'
|
104
|
+
'థ': 'th'
|
105
|
+
'ద': 'd'
|
106
|
+
'ధ': 'dh'
|
107
|
+
'న': 'n'
|
108
|
+
|
109
|
+
# Labials ప ఫ బ భ మ
|
110
|
+
'ప': 'p'
|
111
|
+
'ఫ': 'ph'
|
112
|
+
'బ': 'b'
|
113
|
+
'భ': 'bh'
|
114
|
+
'మ': 'm'
|
115
|
+
|
116
|
+
# Semivowels య ర ల వ
|
117
|
+
'య': 'y'
|
118
|
+
'ర': 'r'
|
119
|
+
'ఱ': 'ṛ'
|
120
|
+
'ల': 'l'
|
121
|
+
'ళ': 'ḷ'
|
122
|
+
'వ': 'v'
|
123
|
+
|
124
|
+
# Sibilants శ ష స హ
|
125
|
+
'శ': 'ś'
|
126
|
+
'ష': 'ṣ'
|
127
|
+
'స': 's'
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
# Aspirate
|
132
|
+
'హ': 'h'
|
133
|
+
|
134
|
+
|
135
|
+
'క్ష': 'kṣa'
|
136
|
+
|
137
|
+
|
138
|
+
# Chandrabindu
|
139
|
+
'ঁ': 'm'
|
140
|
+
|
141
|
+
# Bisarga
|
142
|
+
'ః': 'ḥ'
|
143
|
+
|
144
|
+
# Anusvāra
|
145
|
+
'ం': 'ṃ'
|
146
|
+
|
147
|
+
# Medials # Needed for connecting constants
|
148
|
+
|
149
|
+
'ా': 'ā'
|
150
|
+
'ి': 'i'
|
151
|
+
'ీ': 'ī'
|
152
|
+
'ు': 'u'
|
153
|
+
'ూ': 'ū'
|
154
|
+
'ృ': 'ṛ'
|
155
|
+
'ె': 'e'
|
156
|
+
'ే': 'ē'
|
157
|
+
'ై': 'ai'
|
158
|
+
'ొ': 'o'
|
159
|
+
'ో': 'ŏ'
|
160
|
+
'ౌ': 'au'
|
161
|
+
'\u09CD': '' # Used for joining
|
162
|
+
|
163
|
+
|
164
|
+
'\u0c4d': '' #verma sign for halanta
|
165
|
+
'ౕ ': ''
|
166
|
+
'ౖ ': ''
|
167
|
+
'्': ''
|
168
|
+
'़': ''
|
169
|
+
"": ''# Used for joining
|
170
|
+
"": ''# Used for non joining
|
@@ -0,0 +1,155 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bis
|
3
|
+
id: 1991
|
4
|
+
language: tml
|
5
|
+
source_script: Taml
|
6
|
+
destination_script: Latn
|
7
|
+
name: Indian script code for information interchange - ISCII - Tamil Romanization
|
8
|
+
#url:
|
9
|
+
creation_date: 1991
|
10
|
+
description: |
|
11
|
+
IS 13194 (1991): Indian script code for information
|
12
|
+
interchange - ISCII [LITD 20: Indian Language Technologies
|
13
|
+
and Products]
|
14
|
+
|
15
|
+
notes:
|
16
|
+
- |
|
17
|
+
Exception: Anusvāra is transliterated by:
|
18
|
+
|
19
|
+
a) ṅ before gutturals,
|
20
|
+
b) ñ before palatals,
|
21
|
+
c) ṇ before cerebrals,
|
22
|
+
d) n before dentals, and
|
23
|
+
e) m before labials.
|
24
|
+
|
25
|
+
tests:
|
26
|
+
- source: "இளைஞர்களின் உறுதியான மனநிலையை பிரதிபலிக்கிறது: நீட் தேர்வில் 85-90 சதவீத மாணவர்கள் பங்கேற்பு - ரமேஷ் பொக்ரியால்"
|
27
|
+
expected: "iḷaiñrkḷiṉ uṟutiyāṉ mṉnilaiyai pirtiplikkiṟtu: nīṭ tērvil 85-90 ctvīt māṇvrkḷ pṅkēṟpu - rmēṣ pokriyāl"
|
28
|
+
- source: "நாடாளுமன்றத்தில் 4 மசோதாக்களை எதிர்க்க காங்கிரஸ் முடிவு - ஜெயராம் ரமேஷ்"
|
29
|
+
expected: "nāṭāḷumṉṟttil 4 mcōtākkḷai etirkk kāṅkirs muṭivu - jeyrām rmēṣ"
|
30
|
+
- source: "கர்நாடகாவில் மேலும் 9,894 பேருக்கு கொரோனா தொற்று உறுதி"
|
31
|
+
expected: "krnāṭkāvil mēlum 9,894 pērukku korōṉā toṟṟu uṟuti"
|
32
|
+
- source: "ஐதராபாத்துக்கு கைகொடுக்குமா அதிரடி?"
|
33
|
+
expected: "aitrāpāttukku kaikoṭukkumā atirṭi?"
|
34
|
+
- source: "அமெரிக்க ஓபன் டென்னிஸ்: இறுதிப்போட்டியில் டொமினிக்-ஸ்வெரேவ்"
|
35
|
+
expected: "amerikk ŏpṉ ṭeṉṉis: iṟutippōṭṭiyil ṭomiṉik-sverēv"
|
36
|
+
- source: "ஐ.பி.எல். கிரிக்கெட்டில் களம் இறங்கும் அமெரிக்க வீரர்"
|
37
|
+
expected: "ai.pi.el. kirikkeṭṭil kḷm iṟṅkum amerikk vīrr"
|
38
|
+
- source: "அமெரிக்க ஓபன் டென்னிஸ்; நவோமி ஒசாகா சாம்பியன் பட்டம் வென்றார்"
|
39
|
+
expected: "amerikk ŏpṉ ṭeṉṉis; nvōmi ocākā cāmpiyṉ pṭṭm veṉṟār"
|
40
|
+
- source: "புதிய கல்விக்கொள்கைக்கு எதிர்ப்பு: முன்னாள் துணைவேந்தர்கள் 20 பேர் பிரதமருக்கு கடிதம்"
|
41
|
+
expected: "putiy klvikkoḷkaikku etirppu: muṉṉāḷ tuṇaivēntrkḷ 20 pēr pirtmrukku kṭitm"
|
42
|
+
- source: "இந்த ஆண்டு ஐ.பி.எல். கோப்பையை எந்த அணி வெல்லும்? - கெவின் பீட்டர்சன் கணிப்பு"
|
43
|
+
expected: "int āṇṭu ai.pi.el. kōppaiyai ent aṇi vellum? - keviṉ pīṭṭrcṉ kṇippu"
|
44
|
+
- source: "இந்திய எண்ணெய் கப்பலில் தீ: விபத்து குறித்த எச்சரிக்கையை கப்பல் அதிகாரிகள் புறக்கணித்தனர் - இலங்கை கோர்ட்டு தகவல்"
|
45
|
+
expected: "intiy eṇṇey kpplil tī: vipttu kuṟitt eccrikkaiyai kppl atikārikḷ puṟkkṇittṉr - ilṅkai kōrṭṭu tkvl"
|
46
|
+
|
47
|
+
map:
|
48
|
+
|
49
|
+
rules:
|
50
|
+
# note
|
51
|
+
- pattern: \u0B82(?=[கங])
|
52
|
+
result: ṅ
|
53
|
+
- pattern: \u0B82(?=[சஜஞ])
|
54
|
+
result: ñ
|
55
|
+
- pattern: \u0B82(?=[டண])
|
56
|
+
result: ṇ
|
57
|
+
- pattern: \u0B82(?=[தநன])
|
58
|
+
result: n
|
59
|
+
- pattern: \u0B82(?=[பம])
|
60
|
+
result: m
|
61
|
+
|
62
|
+
characters:
|
63
|
+
'அ': 'a'
|
64
|
+
'ஆ': 'ā'
|
65
|
+
'இ': 'i'
|
66
|
+
'ஈ': 'ī'
|
67
|
+
'உ': 'u'
|
68
|
+
'ஊ': 'ū'
|
69
|
+
|
70
|
+
'எ': 'e'
|
71
|
+
'ஏ': 'ē'
|
72
|
+
'ஐ': 'ai'
|
73
|
+
|
74
|
+
'ஒ': 'o'
|
75
|
+
'ஓ': 'ŏ'
|
76
|
+
'ஔ': 'au'
|
77
|
+
|
78
|
+
# II. Consonants (see Note 2)
|
79
|
+
# Gutturals
|
80
|
+
'க': 'k'
|
81
|
+
'ங': 'ṅ'
|
82
|
+
|
83
|
+
# Palatals
|
84
|
+
'ச': 'c'
|
85
|
+
'ஜ': 'j'
|
86
|
+
'ஞ': 'ñ'
|
87
|
+
|
88
|
+
# Cerebrals
|
89
|
+
'ட': 'ṭ'
|
90
|
+
'ண': 'ṇ'
|
91
|
+
|
92
|
+
# Dentals
|
93
|
+
'த': 't'
|
94
|
+
'ந': 'n'
|
95
|
+
'ன': 'ṉ'
|
96
|
+
|
97
|
+
# Labials
|
98
|
+
'ப': 'p'
|
99
|
+
'ம': 'm'
|
100
|
+
|
101
|
+
# Semivowels
|
102
|
+
'ய': 'y'
|
103
|
+
'ர': 'r'
|
104
|
+
'ற': 'ṟ'
|
105
|
+
'ல': 'l'
|
106
|
+
'ள': 'ḷ'
|
107
|
+
'ழ': 'ẕ'
|
108
|
+
|
109
|
+
|
110
|
+
# Sibilants
|
111
|
+
'வ': 'v'
|
112
|
+
'ஶ': 'ś'
|
113
|
+
'ஷ': 'ṣ'
|
114
|
+
'ஸ': 's'
|
115
|
+
|
116
|
+
|
117
|
+
# Aspirate
|
118
|
+
'ஹ': 'h'
|
119
|
+
|
120
|
+
|
121
|
+
# Bisarga
|
122
|
+
'ஃ': 'ḥ'
|
123
|
+
|
124
|
+
# Anusvāra
|
125
|
+
'ஂ': 'ṃ'
|
126
|
+
|
127
|
+
# Medials # Needed for connecting constants
|
128
|
+
|
129
|
+
'ா': 'ā'
|
130
|
+
'ி': 'i'
|
131
|
+
'ீ': 'ī'
|
132
|
+
'ு': 'u'
|
133
|
+
'ூ': 'ū'
|
134
|
+
|
135
|
+
'ൃ': "ṛ"
|
136
|
+
'ൄ': "ṝ"
|
137
|
+
|
138
|
+
|
139
|
+
'ெ': "e"
|
140
|
+
'ே': "ē"
|
141
|
+
'ை': "ai"
|
142
|
+
'ொ': 'o'
|
143
|
+
'ோ': 'ō'
|
144
|
+
'ௌ': 'au'
|
145
|
+
|
146
|
+
|
147
|
+
'ൺ': 'n'
|
148
|
+
'ൻ': 'ṇ'
|
149
|
+
|
150
|
+
'्': ''
|
151
|
+
'്': ''
|
152
|
+
'்': ''
|
153
|
+
'ൗ': ''
|
154
|
+
"": '' # no need for zero with joiner
|
155
|
+
"": '' # no need for zero with non joiner
|