interscript 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. checksums.yaml +4 -4
  2. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  3. data/lib/interscript.rb +5 -1
  4. data/lib/interscript/fs.rb +3 -1
  5. data/lib/interscript/mapping.rb +2 -2
  6. data/lib/interscript/opal.rb +5 -1
  7. data/lib/interscript/opal/maps.js.erb +7 -4
  8. data/lib/interscript/version.rb +1 -1
  9. data/maps/acadsin-zho-Hani-Latn-2002.yaml +1 -1
  10. data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
  11. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
  14. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +1 -1
  15. data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +2 -2
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +2 -3
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +2 -3
  18. data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
  19. data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -2
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +1 -1
  21. data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
  22. data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +0 -0
  23. data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
  24. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +1 -2
  25. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +2 -2
  26. data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +0 -0
  27. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -1
  28. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -2
  29. data/maps/bgn-kor-Hang-Latn-1943.yaml +1 -1
  30. data/maps/bgn-kor-Kore-Latn-1943.yaml +1 -1
  31. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
  32. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
  33. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +5 -5
  34. data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +0 -0
  35. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +3 -4
  36. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -1
  37. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -1
  38. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +17 -17
  39. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +2 -2
  40. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +2 -2
  41. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
  42. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
  43. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -1
  44. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -1
  45. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  46. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  47. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  48. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
  49. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  50. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  51. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  52. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  53. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  54. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +4 -4
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +3 -3
  57. data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +4 -5
  59. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +4 -5
  60. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -1
  61. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -1
  62. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -1
  63. data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +1 -1
  64. data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +1 -1
  65. data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +1 -1
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -5
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -4
  68. data/maps/icao-per-Arab-Latn-9303.yaml +0 -1
  69. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -1
  70. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -1
  71. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  72. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +4 -5
  73. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +1 -2
  74. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -1
  75. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
  76. data/maps/kp-kor-Hang-Latn-2002.yaml +4 -4
  77. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +2 -2
  78. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +4 -4
  79. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +4 -4
  80. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +1 -2
  81. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +4 -4
  82. data/maps/nil-kor-Hang-Hang-jamo.yaml +3 -3
  83. data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
  84. data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
  85. data/maps/odni-bul-Cyrl-Latn-2015.yaml +3 -3
  86. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  87. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -1
  88. data/maps/{odni-mkd-cyrl-latn-2015.yaml → odni-mkd-Cyrl-Latn-2015.yaml} +0 -0
  89. data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
  90. data/maps/odni-srp-Cyrl-Latn-2015.yaml +2 -2
  91. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  92. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -2
  93. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +4 -4
  94. data/maps/royin-tha-Thai-Latn-1968.yaml +4 -4
  95. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +4 -4
  96. data/maps/royin-tha-Thai-Latn-1999.yaml +3 -3
  97. data/maps/{ses-ara-arab-latn-1930.yaml → ses-ara-Arab-Latn-1930.yaml} +7 -3
  98. data/maps/un-ara-Arab-Latn-1971.yaml +16 -4
  99. data/maps/un-ara-Arab-Latn-1972.yaml +14 -7
  100. data/maps/un-ara-Arab-Latn-2017.yaml +56 -19
  101. data/maps/un-bel-Cyrl-Latn-2007.yaml +3 -3
  102. data/maps/un-ell-Grek-Latn-1987-tl.yaml +1 -2
  103. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -1
  104. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +42 -42
  105. data/maps/un-mon-Mong-Latn-2013.yaml +9 -3
  106. data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
  107. data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
  108. data/maps/{un-ukr-cyrl-latn-1998.yaml → un-ukr-Cyrl-Latn-1998.yaml} +1 -1
  109. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
  110. data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
  111. data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
  112. data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -2
  113. data/maps/var-tha-Thai-Thai-phonemic.yaml +5 -5
  114. data/maps/var-tha-Thai-Zsym-ipa.yaml +12 -12
  115. data/maps/var-zho-Hani-Latn-1979.yaml +7 -7
  116. metadata +41 -15
@@ -0,0 +1,175 @@
1
+ ---
2
+ authority_id: bis
3
+ id: 1991
4
+ language: pnj
5
+ source_script: Guru
6
+ destination_script: Latn
7
+ name: Indian script code for information interchange - ISCII - Panjabi(Gurmukhi) Romanization
8
+ #url:
9
+ creation_date: 1991
10
+ description: |
11
+ IS 13194 (1991): Indian script code for information
12
+ interchange - ISCII [LITD 20: Indian Language Technologies
13
+ and Products]
14
+
15
+ notes:
16
+ - |
17
+ Exception: Anusvāra is transliterated by:
18
+
19
+ a) ṅ before gutturals,
20
+ b) ñ before palatals,
21
+ c) ṇ before cerebrals,
22
+ d) n before dentals, and
23
+ e) m before labials.
24
+
25
+ tests:
26
+ - source: "ਸਦਾ ਜਵਾਨ ਰਹੋ"
27
+ expected: "sdā jvāṉ rhō"
28
+ - source: "ਸਵਾਮੀ ਅਗਨੀਵੇਸ਼ ਦੀ ਮੌਤ"
29
+ expected: "svāmī agṉīvēs dī maut"
30
+ - source: "ਇਸ ਲਈ ਖੱਟੀ ਤਲੀ ਚੀਜ਼ , ਫਾਸਟ ਫੁੂਟ, ਤੇਜ਼ ਮਿਰਚ ਮਸਾਲਿਆਂ ਨੂੰ ਛੱਡਕੇ ਚੰਗੇ ਤੇ ਮੌਸਮੀ ਫਲ, ਹਰੀਆਂ ਸ਼ਬਜ਼ੀਆਂ, ਘਰ ਦੀ ਲੱਸੀ ਤੇ ਦਹੀਂ ਤੇ ਔਰਗੈਨਿਕ ਚੀਜ਼ਾਂ ਹੀ ਅਪਣਾਉ"
31
+ expected: "is lī khṭī tlī cīz , phāsṭ phuūṭ, tēz mirc msāliān ṉūṃ chḍkē cṅgē tē mausmī phl, hrīān śbzīān, ghr dī lsī tē dhīn tē aurgaiṉik cīzān hī apṇāu"
32
+ - source: "ਤੁਹਾਡਾ ਚਿਹਰਾ ਸਦਾ ਚੜ੍ਹਦੀ ਕਲਾ ‘ਚ ਰਹਿਣਾ ਤੇ ਹਮੇਸ਼ਾ ਖਿੜੀਆਂ ਰਹਿਣਾ ਤੇ ਠਾਠਾਂ ਮਾਰਦਾ ਸਰੀਰ ਹੀ ਤੁਹਾਡੇ ਜਵਾਨ ਰਹਿਣ ਦੀ ਨਿਸ਼ਾਨੀ ਹੈ"
33
+ expected: "tuhāḍā cihrā sdā cṛahdī klā ‘c rhiṇā tē hmēśā khiṛaīān rhiṇā tē ṭhāṭhān mārdā srīr hī tuhāḍē jvāṉ rhiṇ dī ṉiśāṉī hai"
34
+ - source: "ਧੌਲੇ ਆਉਣਾ ਜਾਂ ਕਹਿ ਲਵੋ, ਵਾਲ ਚਿੱਟੇ ਹੋਣਾ ਬੁੱਢਾਪਾ ਨਹੀਂ ਹੈ।ਪਰ ਉਮਰ ਤਂੋ ਪਹਿਲਾਂ ਮੂੰਹ ਤੋਂ ਨੂਰ ਉੜ ਜਾਣਾ,ਹਮੇਸ਼ਾਂ ਥੱਕਿਆ-2 ਰਹਿਣਾ ,ਬਿਮਾਰ ਰਹਿਣਾ ਅਸਲ ਬੁਢਾਪਾ ਹੈ"
35
+ expected: "dhaulē āuṇā jān khi lvō, vāl ciṭē hōṇā buḍhāpā ṉhīn hai.pr umr tnō philān mūṃh tōn ṉūr uṛa jāṇā,hmēśān thkiā-2 rhiṇā ,bimār rhiṇā asl buḍhāpā hai"
36
+ - source: "ਇਸ ਲਈ ਇਹ ਆਪਾਂ ਨੂੰ ਦੇਖਣਾ ਪਵੇਗਾ ਕਿ ਆਪਾਂ ਕਿਵੇਂ ਤੰਦਰੁਸਤ ਰਹਿਣਾ ਹੈ ਤੇ ਨਿਰੋਗ ਰਹਿਣਾ ਹੈ"
37
+ expected: "is lī ih āpān ṉūṃ dēkhṇā pvēgā ki āpān kivēn tndrust rhiṇā hai tē ṉirōg rhiṇā hai"
38
+ - source: "ਸਮਾਜਿਕ ਕਾਰਕੁੰਨ ਸਵਾਮੀ ਅਗਨੀਵੇਸ ਦਾ ਅੱਜ ਸ਼ਾਮੀਂ ਦਿਹਾਂਤ ਹੋ ਗਿਆ"
39
+ expected: "smājik kārkunṉ svāmī agṉīvēs dā aj sāmīn dihānt hō giā"
40
+ - source: "ਹਰ ਇਨਸਾਨ ਸਦਾ ਸਵਸਥ ਤੇ ਜਵਾਨ ਰਹਿਣਾ ਚਾਹੁੰਦਾ ਹੈ"
41
+ expected: "hr iṉsāṉ sdā svsth tē jvāṉ rhiṇā cāhundā hai"
42
+ - source: "ਜਨਮ ਲੈਣਾ ਤੇ ਮੌਤ ਇੱਕ ਅਟਲ ਸੱਚਾਈ ਹੈ"
43
+ expected: "jṉm laiṇā tē maut ik aṭl scāī hai"
44
+ - source: "ਇਸਦੇ ਉਲਟ ਤੁਹਾਡੀ ਗਲਤ ਜੀਵਨ ਸ਼ੈਲੀ, ਗਲਤ ਖਾਣਾ ਤੇ ਹਮੇਸ਼ਾ ਕੀਮਤੀ ਸਰੀਰ ਪ੍ਰਤੀ ਲਾਪਰਵਾਹੀ ਕਦੇ ਵੀ ਤੁਹਾਨੁੂੰ ਜਵਾਨ ਤੇ ਨਿਰੋਗੀ ਨਹੀਂ ਰੱਖ ਸਕਦੀ"
45
+ expected: "isdē ulṭ tuhāḍī glt jīvṉ śailī, glt khāṇā tē hmēśā kīmtī srīr prtī lāprvāhī kdē vī tuhāṉuūṃ jvāṉ tē ṉirōgī ṉhīn rkh skdī"
46
+
47
+ map:
48
+
49
+ rules:
50
+ # note
51
+ - pattern: \u0A70(?=[ਕਖਖ਼ਗਗ਼ਘਙ]) # ੰ before gutturals
52
+ result: ṅ
53
+ - pattern: \u0A70(?=[ਚਛਜਜ਼ਝਞ]) # ੰ before palatals
54
+ result: ñ
55
+ - pattern: \u0A70(?=[ਟਠਡਢਣ]) # ੰ before cerebrals
56
+ result: ṇ
57
+ - pattern: \u0A70(?=[ਤਥਦਧਨ]) # ੰ before dentals
58
+ result: n
59
+ - pattern: \u0A70(?=[ਪਫਬਭਮ])
60
+ result: m
61
+
62
+
63
+ characters:
64
+ 'ਅ': 'a'
65
+ 'ਆ': 'ā'
66
+ 'ਇ': 'i'
67
+ 'ਈ': 'ī'
68
+ 'ਉ': 'u'
69
+ 'ਊ': 'ū'
70
+ 'ੲ': 'ṛ'
71
+ 'ੳ': 'ṝ'
72
+ 'ਏ': 'ē'
73
+ 'ਐ': 'ai'
74
+
75
+ 'ਓ': 'ŏ'
76
+ 'ਔ': 'au'
77
+
78
+ # II. Consonants (see Note 2)
79
+ # Gutturals
80
+ 'ਕ': 'k'
81
+ 'ਖ': 'kh'
82
+ 'ਗ': 'g'
83
+ 'ਘ': 'gh'
84
+ 'ਙ': 'ṅ'
85
+
86
+ # Palatals
87
+ 'ਚ': 'c'
88
+ 'ਛ': 'ch'
89
+ 'ਜ': 'j'
90
+ 'ਝ': 'jh'
91
+ 'ਞ': 'ñ'
92
+
93
+ # Cerebrals
94
+ 'ਟ': 'ṭ'
95
+ 'ਠ': 'ṭh'
96
+ 'ਡ': 'ḍ'
97
+ 'ਢ': 'ḍh'
98
+ 'ਣ': 'ṇ'
99
+
100
+ # Dentals
101
+ 'ਤ': 't'
102
+ 'ਥ': 'th'
103
+ 'ਦ': 'd'
104
+ 'ੜ': 'ṛa'
105
+ 'ਧ': 'dh'
106
+ 'ਨ': 'ṉ'
107
+
108
+ # Labials
109
+ 'ਪ': 'p'
110
+ 'ਫ': 'ph'
111
+ 'ਬ': 'b'
112
+ 'ਭ': 'bh'
113
+ 'ਮ': 'm'
114
+
115
+ # Semivowels
116
+ 'ਯ': 'y'
117
+ #'य़': 'ẏ'
118
+ 'ਰ': 'r'
119
+ #'ऱ': 'ṟ'
120
+ 'ਲ': 'l'
121
+ 'ਲ਼': 'ḷ'
122
+ #'ऴ': 'ẕ'
123
+
124
+
125
+ 'ਵ': 'v'
126
+
127
+ # Sibilants
128
+ 'ਸ਼': 'ś'
129
+ #'ष': 'ṣ'
130
+ 'ਸ': 's'
131
+
132
+
133
+ # Aspirate
134
+ 'ਹ': 'h'
135
+
136
+
137
+ #Nukta Constants
138
+
139
+ 'क़': 'q'
140
+ 'ਖ਼': 'ḵẖ'
141
+ 'ਗ਼': 'gẖ'
142
+ 'ਜ਼': 'z'
143
+ 'ਫ਼': 'f'
144
+
145
+ # Chandrabindu
146
+ 'ँ': 'm'
147
+
148
+ # Bisarga
149
+ 'ः ': 'ḥ'
150
+ 'ਂ': 'n'
151
+
152
+ # Anusvāra
153
+ 'ੰ': 'ṃ'
154
+
155
+ # Medials # Needed for connecting constants
156
+ 'ਾ': "ā"
157
+ 'ਿ': "i"
158
+ 'ੀ': "ī"
159
+ 'ੁ': "u"
160
+ 'ੂ': "ū"
161
+ 'ृ': "ṛ"
162
+
163
+
164
+ 'ੇ': "ē"
165
+ 'ੈ': "ai"
166
+
167
+
168
+ 'ੋ': 'ō'
169
+ 'ੌ': 'au'
170
+
171
+ '੍': ''
172
+ '਼': ''
173
+ 'ੱ': ''
174
+ '।': '.'
175
+ "‍": '' # no need for zero with joiner
@@ -0,0 +1,170 @@
1
+ ---
2
+ authority_id: bis
3
+ id: 1991
4
+ language: tel
5
+ source_script: Telu
6
+ destination_script: Latn
7
+ name: Indian script code for information interchange - ISCII - Telegu Romanization
8
+ #url:
9
+ creation_date: 1991
10
+ description: |
11
+ IS 13194 (1991): Indian script code for information
12
+ interchange - ISCII [LITD 20: Indian Language Technologies
13
+ and Products]
14
+
15
+ notes:
16
+ - |
17
+ Exception: Anusvāra is transliterated by:
18
+
19
+ a) ṅ before gutturals,
20
+ b) ñ before palatals,
21
+ c) ṇ before cerebrals,
22
+ d) n before dentals, and
23
+ e) m before labials.
24
+
25
+ tests:
26
+ - source: "ఇప్పుడు ఇదే కోవలో టాలీవుడ్‌లో మరో మల్టీస్టారర్‌ రూపొందనుందని సినీ వర్గాల్లో వార్తలు వినిపిస్తున్నాయి"
27
+ expected: "ippuḍu idē kŏvlŏ ṭālīvuḍlŏ mrŏ mlṭīsṭārr rūpondnundni sinī vrgāllŏ vārtlu vinipistunnāyi"
28
+ - source: "అంటే ఉంటాయి, అయితే అవి చాలా పెద్దవై ఉండాల్సిన అవసరం లేదు అంటున్నారు మమ్ముట్టి"
29
+ expected: "aṇṭē uṇṭāyi, ayitē avi cālā peddvai uṇḍālsin avsrṃ lēdu aṇṭunnāru mmmuṭṭi"
30
+ - source: "ఆ సంతోషాన్ని అభిమానులతో పంచుకున్నారు"
31
+ expected: "ā sntŏṣānni abhimānultŏ pñcukunnāru"
32
+ - source: "కెమెరాను అన్‌బాక్స్‌ చేసే వీడియోను సోషల్‌ మీడియాలో అభిమానులతో పంచుకున్నారు"
33
+ expected: "kemerānu anbāks cēsē vīḍiyŏnu sŏṣl mīḍiyālŏ abhimānultŏ pñcukunnāru"
34
+ - source: "ఇన్నాళ్లకు నిజమయింది. ఇక ఇప్పటి నుంచి దీంతో ఫొటోలు క్లిక్‌ మనిపిస్తా’’ అని ఆ వీడియోలో పేర్కొన్నారు"
35
+ expected: "innāḷlku nijmyindi. ik ippṭi nuñci dīntŏ phoṭŏlu klik mnipistā’’ ani ā vīḍiyŏlŏ pērkonnāru"
36
+ - source: "గవర్నర్‌తో కంగనా భేటీ"
37
+ expected: "gvrnrtŏ kṅgnā bhēṭī"
38
+ - source: "శ్రియ సినిమా సెట్‌లో అడుగుపెట్టి ఆరు నెలలు కావొస్తోంది"
39
+ expected: "śriy sinimā seṭlŏ aḍugupeṭṭi āru nellu kāvostŏndi"
40
+ - source: "ఇప్పుడు తను కోరుకున్న కెమెరా చేతికి రావడంతో త్వరలో మమ్ముట్టి నుంచి స్టన్నింగ్‌ ఫొటోస్‌ రావడం ఖాయం అంటున్నారు ఆయన అభిమానులు"
41
+ expected: "ippuḍu tnu kŏrukunn kemerā cētiki rāvḍntŏ tvrlŏ mmmuṭṭi nuñci sṭnniṅg phoṭŏs rāvḍṃ khāyṃ aṇṭunnāru āyn abhimānulu"
42
+ - source: "ఇప్పుడు ఆ వీడియో వైరల్‌ అయింది. ‘ఆ కెమెరాను కొనాలనేది చాలాకాలంగా నా కల."
43
+ expected: "ippuḍu ā vīḍiyŏ vairl ayindi. ‘ā kemerānu konālnēdi cālākālṅgā nā kl."
44
+ - source: "మరో వైపు ఎన్టీఆర్‌, రామ్‌చరణ్‌ కలిసి ట్రిపుల్‌ ఆర్‌ సినిమాలో నటిస్తున్నారు"
45
+ expected: "mrŏ vaipu enṭīār, rāmcrṇ klisi ṭripul ār sinimālŏ nṭistunnāru"
46
+ map:
47
+
48
+ rules:
49
+ # note
50
+ - pattern: \u0C02(?=[కఖగఘఙ])
51
+ result: ṅ
52
+ - pattern: \u0C02(?=[చఛజఝఞ])
53
+ result: ñ
54
+ - pattern: \u0C02(?=[టఠడఢణ])
55
+ result: ṇ
56
+ - pattern: \u0C02(?=[తథదధన])
57
+ result: n
58
+ - pattern: \u0C02(?=[పఫబభమ])
59
+ result: m
60
+
61
+
62
+ characters:
63
+ 'అ': 'a'
64
+ 'ఆ': 'ā'
65
+ 'ఇ': 'i'
66
+ 'ఈ': 'ī'
67
+ 'ఉ': 'u'
68
+ 'ఊ': 'ū'
69
+ 'ఋ': 'ṛ'
70
+ 'ఌ': 'ḻ'
71
+ 'ౡ': 'ḻ'
72
+ 'ఎ': 'e'
73
+ 'ఏ': 'ē'
74
+ 'ఐ': 'ai'
75
+ 'ఒ': 'o'
76
+ 'ఓ': 'ŏ'
77
+ 'ఔ': 'au'
78
+
79
+ # II. Consonants (see Note 2)
80
+ # Gutturals క ఖ గ ఘ ఙ
81
+ 'క': 'k'
82
+ 'ఖ': 'kh'
83
+ 'గ': 'g'
84
+ 'ఘ': 'gh'
85
+ 'ఙ': 'ṅ'
86
+
87
+ # Palatals చ ఛ జ ఝ ఞ
88
+ 'చ': 'c'
89
+ 'ఛ': 'ch'
90
+ 'జ': 'j'
91
+ 'ఝ': 'jh'
92
+ 'ఞ': 'ñ'
93
+
94
+ # Cerebrals ట ఠ డ ఢ ణ
95
+ 'ట': 'ṭ'
96
+ 'ఠ': 'ṭh'
97
+ 'డ': 'ḍ'
98
+ 'ఢ': 'ḍh'
99
+ 'ణ': 'ṇ'
100
+
101
+ # Dentals త థ ద ధ న
102
+ 'త': 't'
103
+ #'ৎ': 't'
104
+ 'థ': 'th'
105
+ 'ద': 'd'
106
+ 'ధ': 'dh'
107
+ 'న': 'n'
108
+
109
+ # Labials ప ఫ బ భ మ
110
+ 'ప': 'p'
111
+ 'ఫ': 'ph'
112
+ 'బ': 'b'
113
+ 'భ': 'bh'
114
+ 'మ': 'm'
115
+
116
+ # Semivowels య ర ల వ
117
+ 'య': 'y'
118
+ 'ర': 'r'
119
+ 'ఱ': 'ṛ'
120
+ 'ల': 'l'
121
+ 'ళ': 'ḷ'
122
+ 'వ': 'v'
123
+
124
+ # Sibilants శ ష స హ
125
+ 'శ': 'ś'
126
+ 'ష': 'ṣ'
127
+ 'స': 's'
128
+
129
+
130
+
131
+ # Aspirate
132
+ 'హ': 'h'
133
+
134
+
135
+ 'క్ష': 'kṣa'
136
+
137
+
138
+ # Chandrabindu
139
+ 'ঁ': 'm'
140
+
141
+ # Bisarga
142
+ 'ః': 'ḥ'
143
+
144
+ # Anusvāra
145
+ 'ం': 'ṃ'
146
+
147
+ # Medials # Needed for connecting constants
148
+
149
+ 'ా': 'ā'
150
+ 'ి': 'i'
151
+ 'ీ': 'ī'
152
+ 'ు': 'u'
153
+ 'ూ': 'ū'
154
+ 'ృ': 'ṛ'
155
+ 'ె': 'e'
156
+ 'ే': 'ē'
157
+ 'ై': 'ai'
158
+ 'ొ': 'o'
159
+ 'ో': 'ŏ'
160
+ 'ౌ': 'au'
161
+ '\u09CD': '' # Used for joining
162
+
163
+
164
+ '\u0c4d': '' #verma sign for halanta
165
+ 'ౕ ': ''
166
+ 'ౖ ': ''
167
+ '्': ''
168
+ '़': ''
169
+ "‍": ''# Used for joining
170
+ "‌": ''# Used for non joining
@@ -0,0 +1,155 @@
1
+ ---
2
+ authority_id: bis
3
+ id: 1991
4
+ language: tml
5
+ source_script: Taml
6
+ destination_script: Latn
7
+ name: Indian script code for information interchange - ISCII - Tamil Romanization
8
+ #url:
9
+ creation_date: 1991
10
+ description: |
11
+ IS 13194 (1991): Indian script code for information
12
+ interchange - ISCII [LITD 20: Indian Language Technologies
13
+ and Products]
14
+
15
+ notes:
16
+ - |
17
+ Exception: Anusvāra is transliterated by:
18
+
19
+ a) ṅ before gutturals,
20
+ b) ñ before palatals,
21
+ c) ṇ before cerebrals,
22
+ d) n before dentals, and
23
+ e) m before labials.
24
+
25
+ tests:
26
+ - source: "இளைஞர்களின் உறுதியான மனநிலையை பிரதிபலிக்கிறது: நீட் தேர்வில் 85-90 சதவீத மாணவர்கள் பங்கேற்பு - ரமேஷ் பொக்ரியால்"
27
+ expected: "iḷaiñrkḷiṉ uṟutiyāṉ mṉnilaiyai pirtiplikkiṟtu: nīṭ tērvil 85-90 ctvīt māṇvrkḷ pṅkēṟpu - rmēṣ pokriyāl"
28
+ - source: "நாடாளுமன்றத்தில் 4 மசோதாக்களை எதிர்க்க காங்கிரஸ் முடிவு - ஜெயராம் ரமேஷ்"
29
+ expected: "nāṭāḷumṉṟttil 4 mcōtākkḷai etirkk kāṅkirs muṭivu - jeyrām rmēṣ"
30
+ - source: "கர்நாடகாவில் மேலும் 9,894 பேருக்கு கொரோனா தொற்று உறுதி"
31
+ expected: "krnāṭkāvil mēlum 9,894 pērukku korōṉā toṟṟu uṟuti"
32
+ - source: "ஐதராபாத்துக்கு கைகொடுக்குமா அதிரடி?"
33
+ expected: "aitrāpāttukku kaikoṭukkumā atirṭi?"
34
+ - source: "அமெரிக்க ஓபன் டென்னிஸ்: இறுதிப்போட்டியில் டொமினிக்-ஸ்வெரேவ்"
35
+ expected: "amerikk ŏpṉ ṭeṉṉis: iṟutippōṭṭiyil ṭomiṉik-sverēv"
36
+ - source: "ஐ.பி.எல். கிரிக்கெட்டில் களம் இறங்கும் அமெரிக்க வீரர்"
37
+ expected: "ai.pi.el. kirikkeṭṭil kḷm iṟṅkum amerikk vīrr"
38
+ - source: "அமெரிக்க ஓபன் டென்னிஸ்; நவோமி ஒசாகா சாம்பியன் பட்டம் வென்றார்"
39
+ expected: "amerikk ŏpṉ ṭeṉṉis; nvōmi ocākā cāmpiyṉ pṭṭm veṉṟār"
40
+ - source: "புதிய கல்விக்கொள்கைக்கு எதிர்ப்பு: முன்னாள் துணைவேந்தர்கள் 20 பேர் பிரதமருக்கு கடிதம்"
41
+ expected: "putiy klvikkoḷkaikku etirppu: muṉṉāḷ tuṇaivēntrkḷ 20 pēr pirtmrukku kṭitm"
42
+ - source: "இந்த ஆண்டு ஐ.பி.எல். கோப்பையை எந்த அணி வெல்லும்? - கெவின் பீட்டர்சன் கணிப்பு"
43
+ expected: "int āṇṭu ai.pi.el. kōppaiyai ent aṇi vellum? - keviṉ pīṭṭrcṉ kṇippu"
44
+ - source: "இந்திய எண்ணெய் கப்பலில் தீ: விபத்து குறித்த எச்சரிக்கையை கப்பல் அதிகாரிகள் புறக்கணித்தனர் - இலங்கை கோர்ட்டு தகவல்"
45
+ expected: "intiy eṇṇey kpplil tī: vipttu kuṟitt eccrikkaiyai kppl atikārikḷ puṟkkṇittṉr - ilṅkai kōrṭṭu tkvl"
46
+
47
+ map:
48
+
49
+ rules:
50
+ # note
51
+ - pattern: \u0B82(?=[கங])
52
+ result: ṅ
53
+ - pattern: \u0B82(?=[சஜஞ])
54
+ result: ñ
55
+ - pattern: \u0B82(?=[டண])
56
+ result: ṇ
57
+ - pattern: \u0B82(?=[தநன])
58
+ result: n
59
+ - pattern: \u0B82(?=[பம])
60
+ result: m
61
+
62
+ characters:
63
+ 'அ': 'a'
64
+ 'ஆ': 'ā'
65
+ 'இ': 'i'
66
+ 'ஈ': 'ī'
67
+ 'உ': 'u'
68
+ 'ஊ': 'ū'
69
+
70
+ 'எ': 'e'
71
+ 'ஏ': 'ē'
72
+ 'ஐ': 'ai'
73
+
74
+ 'ஒ': 'o'
75
+ 'ஓ': 'ŏ'
76
+ 'ஔ': 'au'
77
+
78
+ # II. Consonants (see Note 2)
79
+ # Gutturals
80
+ 'க': 'k'
81
+ 'ங': 'ṅ'
82
+
83
+ # Palatals
84
+ 'ச': 'c'
85
+ 'ஜ': 'j'
86
+ 'ஞ': 'ñ'
87
+
88
+ # Cerebrals
89
+ 'ட': 'ṭ'
90
+ 'ண': 'ṇ'
91
+
92
+ # Dentals
93
+ 'த': 't'
94
+ 'ந': 'n'
95
+ 'ன': 'ṉ'
96
+
97
+ # Labials
98
+ 'ப': 'p'
99
+ 'ம': 'm'
100
+
101
+ # Semivowels
102
+ 'ய': 'y'
103
+ 'ர': 'r'
104
+ 'ற': 'ṟ'
105
+ 'ல': 'l'
106
+ 'ள': 'ḷ'
107
+ 'ழ': 'ẕ'
108
+
109
+
110
+ # Sibilants
111
+ 'வ': 'v'
112
+ 'ஶ': 'ś'
113
+ 'ஷ': 'ṣ'
114
+ 'ஸ': 's'
115
+
116
+
117
+ # Aspirate
118
+ 'ஹ': 'h'
119
+
120
+
121
+ # Bisarga
122
+ 'ஃ': 'ḥ'
123
+
124
+ # Anusvāra
125
+ 'ஂ': 'ṃ'
126
+
127
+ # Medials # Needed for connecting constants
128
+
129
+ 'ா': 'ā'
130
+ 'ி': 'i'
131
+ 'ீ': 'ī'
132
+ 'ு': 'u'
133
+ 'ூ': 'ū'
134
+
135
+ 'ൃ': "ṛ"
136
+ 'ൄ': "ṝ"
137
+
138
+
139
+ 'ெ': "e"
140
+ 'ே': "ē"
141
+ 'ை': "ai"
142
+ 'ொ': 'o'
143
+ 'ோ': 'ō'
144
+ 'ௌ': 'au'
145
+
146
+
147
+ 'ൺ': 'n'
148
+ 'ൻ': 'ṇ'
149
+
150
+ '्': ''
151
+ '്': ''
152
+ '்': ''
153
+ 'ൗ': ''
154
+ "‍": '' # no need for zero with joiner
155
+ "‌": '' # no need for zero with non joiner